001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.tools; 003 004//// Taken from http://www.bmsi.com/java/#diff 005 006// http://www.bmsi.com/java/DiffPrint.java could also be useful 007 008/* 009 * $Log: Diff.java,v $ 010 * Revision 1.15 2013/04/01 16:27:31 stuart 011 * Fix DiffPrint unified format with test cases. 012 * Begin porting some diff-2.7 features. 013 * 014 * Revision 1.14 2010/03/03 21:21:25 stuart 015 * Test new direct equivalence API 016 * 017 * Revision 1.13 2009/12/07 17:43:17 stuart 018 * Compute equivMax for int[] ctor 019 * 020 * Revision 1.12 2009/12/07 17:34:46 stuart 021 * Ctor with int[]. 022 * 023 * Revision 1.11 2009/11/15 01:11:54 stuart 024 * Diff doesn't need to be generic 025 * 026 * Revision 1.10 2009/11/15 00:54:03 stuart 027 * Update to Java 5 containers 028 * 029 * Revision 1.7 2009/01/19 03:05:26 stuart 030 * Fix StackOverflow bug with heuristic on reported by Jimmy Han. 031 * 032 * Revision 1.6 2003/03/06 22:51:32 stuart 033 * Convert to CVS 034 * 035 * Revision 1.5 2002/07/19 19:14:40 stuart 036 * fix reverseScript, make change ctor public, update docs 037 * 038 * Revision 1.4 2002/04/09 17:53:39 stuart 039 * More flexible interface for diff() function. 040 * 041 * Revision 1.3 2000/03/03 21:58:03 stuart 042 * move discard_confusing_lines and shift_boundaries to class file_data 043 * 044 * Revision 1.2 2000/03/02 16:37:38 stuart 045 * Add GPL and copyright 046 * 047 */ 048 049import java.util.HashMap; 050import java.util.Map; 051 052/** A class to compare vectors of objects. The result of comparison 053 is a list of <code>change</code> objects which form an 054 edit script. The objects compared are traditionally lines 055 of text from two files. Comparison options such as "ignore 056 whitespace" are implemented by modifying the <code>equals</code> 057 and <code>hashcode</code> methods for the objects compared. 058<p> 059 The basic algorithm is described in: <br> 060 "An O(ND) Difference Algorithm and its Variations", Eugene Myers, 061 Algorithmica Vol. 1 No. 2, 1986, p 251. 062<p> 063 This class outputs different results from GNU diff 1.15 on some 064 inputs. Our results are actually better (smaller change list, smaller 065 total size of changes), but it would be nice to know why. Perhaps 066 there is a memory overwrite bug in GNU diff 1.15. 067 068 @author Stuart D. Gathman, translated from GNU diff 1.15 069 Copyright (C) 2000 Business Management Systems, Inc. 070<p> 071 This program is free software; you can redistribute it and/or modify 072 it under the terms of the GNU General Public License as published by 073 the Free Software Foundation; either version 1, or (at your option) 074 any later version. 075<p> 076 This program is distributed in the hope that it will be useful, 077 but WITHOUT ANY WARRANTY; without even the implied warranty of 078 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 079 GNU General Public License for more details. 080<p> 081 You should have received a copy of the <a href=COPYING.txt> 082 GNU General Public License</a> 083 along with this program; if not, write to the Free Software 084 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 085 */ 086public class Diff { 087 088 /** Prepare to find differences between two arrays. Each element of 089 the arrays is translated to an "equivalence number" based on 090 the result of <code>equals</code>. The original Object arrays 091 are no longer needed for computing the differences. They will 092 be needed again later to print the results of the comparison as 093 an edit script, if desired. 094 * @param a first array 095 * @param b second array 096 */ 097 public Diff(Object[] a, Object[] b) { 098 Map<Object, Integer> h = new HashMap<>(a.length + b.length); 099 filevec = new FileData[] {new FileData(a, h), new FileData(b, h)}; 100 } 101 102 /** 1 more than the maximum equivalence value used for this or its 103 sibling file. */ 104 private int equivMax = 1; 105 106 private int[] xvec, yvec; /* Vectors being compared. */ 107 private int[] fdiag; /* Vector, indexed by diagonal, containing 108 the X coordinate of the point furthest 109 along the given diagonal in the forward 110 search of the edit matrix. */ 111 private int[] bdiag; /* Vector, indexed by diagonal, containing 112 the X coordinate of the point furthest 113 along the given diagonal in the backward 114 search of the edit matrix. */ 115 private int fdiagoff, bdiagoff; 116 private final FileData[] filevec; 117 private int cost; 118 119 /** 120 * Find the midpoint of the shortest edit script for a specified 121 * portion of the two files. 122 * 123 * We scan from the beginnings of the files, and simultaneously from the ends, 124 * doing a breadth-first search through the space of edit-sequence. 125 * When the two searches meet, we have found the midpoint of the shortest 126 * edit sequence. 127 * 128 * The value returned is the number of the diagonal on which the midpoint lies. 129 * The diagonal number equals the number of inserted lines minus the number 130 * of deleted lines (counting only lines before the midpoint). 131 * The edit cost is stored into COST; this is the total number of 132 * lines inserted or deleted (counting only lines before the midpoint). 133 * 134 * This function assumes that the first lines of the specified portions 135 * of the two files do not match, and likewise that the last lines do not 136 * match. The caller must trim matching lines from the beginning and end 137 * of the portions it is going to specify. 138 * 139 * Note that if we return the "wrong" diagonal value, or if 140 * the value of bdiag at that diagonal is "wrong", 141 * the worst this can do is cause suboptimal diff output. 142 * It cannot cause incorrect diff output. 143 * @param xoff xoff 144 * @param xlim xlim 145 * @param yoff yoff 146 * @param ylim ylim 147 * @return midpoint of the shortest edit script 148 */ 149 private int diag(int xoff, int xlim, int yoff, int ylim) { 150 final int[] fd = fdiag; // Give the compiler a chance. 151 final int[] bd = bdiag; // Additional help for the compiler. 152 final int[] xv = xvec; // Still more help for the compiler. 153 final int[] yv = yvec; // And more and more . . . 154 final int dmin = xoff - ylim; // Minimum valid diagonal. 155 final int dmax = xlim - yoff; // Maximum valid diagonal. 156 final int fmid = xoff - yoff; // Center diagonal of top-down search. 157 final int bmid = xlim - ylim; // Center diagonal of bottom-up search. 158 int fmin = fmid, fmax = fmid; // Limits of top-down search. 159 int bmin = bmid, bmax = bmid; // Limits of bottom-up search. 160 // True if southeast corner is on an odd diagonal with respect to the northwest. 161 final boolean odd = (fmid - bmid & 1) != 0; 162 163 fd[fdiagoff + fmid] = xoff; 164 bd[bdiagoff + bmid] = xlim; 165 166 for (int c = 1;; ++c) { 167 int d; /* Active diagonal. */ 168 169 /* Extend the top-down search by an edit step in each diagonal. */ 170 if (fmin > dmin) { 171 fd[fdiagoff + --fmin - 1] = -1; 172 } else { 173 ++fmin; 174 } 175 if (fmax < dmax) { 176 fd[fdiagoff + ++fmax + 1] = -1; 177 } else { 178 --fmax; 179 } 180 for (d = fmax; d >= fmin; d -= 2) { 181 int x; 182 int y; 183 int tlo = fd[fdiagoff + d - 1]; 184 int thi = fd[fdiagoff + d + 1]; 185 186 if (tlo >= thi) { 187 x = tlo + 1; 188 } else { 189 x = thi; 190 } 191 y = x - d; 192 while (x < xlim && y < ylim && xv[x] == yv[y]) { 193 ++x; ++y; 194 } 195 fd[fdiagoff + d] = x; 196 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) { 197 cost = 2 * c - 1; 198 return d; 199 } 200 } 201 202 /* Similar extend the bottom-up search. */ 203 if (bmin > dmin) { 204 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE; 205 } else { 206 ++bmin; 207 } 208 if (bmax < dmax) { 209 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE; 210 } else { 211 --bmax; 212 } 213 for (d = bmax; d >= bmin; d -= 2) { 214 int x, y, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1]; 215 216 if (tlo < thi) { 217 x = tlo; 218 } else { 219 x = thi - 1; 220 } 221 y = x - d; 222 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) { 223 --x; --y; 224 } 225 bd[bdiagoff + d] = x; 226 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) { 227 cost = 2 * c; 228 return d; 229 } 230 } 231 } 232 } 233 234 /** 235 * Compare in detail contiguous subsequences of the two files 236 * which are known, as a whole, to match each other. 237 * 238 * The results are recorded in the vectors filevec[N].changed_flag, by 239 * storing a 1 in the element for each line that is an insertion or deletion. 240 * 241 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. 242 * 243 * Note that XLIM, YLIM are exclusive bounds. 244 * All line numbers are origin-0 and discarded lines are not counted. 245 * @param xoff xoff 246 * @param xlim xlim 247 * @param yoff yoff 248 * @param ylim ylim 249 */ 250 private void compareseq(int xoff, int xlim, int yoff, int ylim) { 251 /* Slide down the bottom initial diagonal. */ 252 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) { 253 ++xoff; ++yoff; 254 } 255 /* Slide up the top initial diagonal. */ 256 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) { 257 --xlim; --ylim; 258 } 259 260 /* Handle simple cases. */ 261 if (xoff == xlim) { 262 while (yoff < ylim) { 263 filevec[1].changedFlag[1+filevec[1].realindexes[yoff++]] = true; 264 } 265 } else if (yoff == ylim) { 266 while (xoff < xlim) { 267 filevec[0].changedFlag[1+filevec[0].realindexes[xoff++]] = true; 268 } 269 } else { 270 /* Find a point of correspondence in the middle of the files. */ 271 272 int d = diag(xoff, xlim, yoff, ylim); 273 int c = cost; 274 int b = bdiag[bdiagoff + d]; 275 276 if (c == 1) 277 /* This should be impossible, because it implies that 278 one of the two subsequences is empty, 279 and that case was handled above without calling `diag'. 280 Let's verify that this is true. */ 281 throw new IllegalArgumentException("Empty subsequence"); 282 else { 283 /* Use that point to split this problem into two subproblems. */ 284 compareseq(xoff, b, yoff, b - d); 285 /* This used to use f instead of b, 286 but that is incorrect! 287 It is not necessarily the case that diagonal d 288 has a snake from b to f. */ 289 compareseq(b, xlim, b - d, ylim); 290 } 291 } 292 } 293 294 /** Discard lines from one file that have no matches in the other file. 295 */ 296 private void discardConfusingLines() { 297 filevec[0].discardConfusingLines(filevec[1]); 298 filevec[1].discardConfusingLines(filevec[0]); 299 } 300 301 /** 302 * Adjust inserts/deletes of blank lines to join changes as much as possible. 303 */ 304 private void shiftBoundaries() { 305 filevec[0].shiftBoundaries(filevec[1]); 306 filevec[1].shiftBoundaries(filevec[0]); 307 } 308 309 /** 310 * Script builder. 311 * @since 10600 (functional interface) 312 */ 313 @FunctionalInterface 314 public interface ScriptBuilder { 315 /** 316 * Scan the tables of which lines are inserted and deleted, producing an edit script. 317 * @param changed0 true for lines in first file which do not match 2nd 318 * @param len0 number of lines in first file 319 * @param changed1 true for lines in 2nd file which do not match 1st 320 * @param len1 number of lines in 2nd file 321 * @return a linked list of changes - or null 322 * @since 10600 (renamed) 323 */ 324 Change buildScript( 325 boolean[] changed0, int len0, 326 boolean[] changed1, int len1 327 ); 328 } 329 330 /** 331 * Scan the tables of which lines are inserted and deleted, producing an edit script in reverse order. 332 */ 333 static class ReverseScript implements ScriptBuilder { 334 @Override 335 public Change buildScript( 336 final boolean[] changed0, int len0, 337 final boolean[] changed1, int len1) { 338 Change script = null; 339 int i0 = 0, i1 = 0; 340 while (i0 < len0 || i1 < len1) { 341 if (changed0[1+i0] || changed1[1+i1]) { 342 int line0 = i0, line1 = i1; 343 344 /* Find # lines changed here in each file. */ 345 while (changed0[1+i0]) { 346 ++i0; 347 } 348 while (changed1[1+i1]) { 349 ++i1; 350 } 351 352 /* Record this change. */ 353 script = new Change(line0, line1, i0 - line0, i1 - line1, script); 354 } 355 356 /* We have reached lines in the two files that match each other. */ 357 i0++; i1++; 358 } 359 360 return script; 361 } 362 } 363 364 static class ForwardScript implements ScriptBuilder { 365 /** Scan the tables of which lines are inserted and deleted, 366 producing an edit script in forward order. */ 367 @Override 368 public Change buildScript( 369 final boolean[] changed0, int len0, 370 final boolean[] changed1, int len1) { 371 Change script = null; 372 int i0 = len0, i1 = len1; 373 374 while (i0 >= 0 || i1 >= 0) { 375 if (changed0[i0] || changed1[i1]) { 376 int line0 = i0, line1 = i1; 377 378 /* Find # lines changed here in each file. */ 379 while (changed0[i0]) { 380 --i0; 381 } 382 while (changed1[i1]) { 383 --i1; 384 } 385 386 /* Record this change. */ 387 script = new Change(i0, i1, line0 - i0, line1 - i1, script); 388 } 389 390 /* We have reached lines in the two files that match each other. */ 391 i0--; i1--; 392 } 393 394 return script; 395 } 396 } 397 398 /** Standard Forward ScriptBuilder. */ 399 public static final ScriptBuilder forwardScript = new ForwardScript(); 400 /** Standard Reverse ScriptBuilder. */ 401 public static final ScriptBuilder reverseScript = new ReverseScript(); 402 403 /** 404 * Report the differences of two files. DEPTH is the current directory depth. 405 * @param reverse if {@code true} use {@link #reverseScript} else use {@link #forwardScript} 406 * @return the differences of two files 407 */ 408 public final Change diff2(final boolean reverse) { 409 return diff(reverse ? reverseScript : forwardScript); 410 } 411 412 /** 413 * Get the results of comparison as an edit script. The script 414 * is described by a list of changes. The standard ScriptBuilder 415 * implementations provide for forward and reverse edit scripts. 416 * Alternate implementations could, for instance, list common elements 417 * instead of differences. 418 * @param bld an object to build the script from change flags 419 * @return the head of a list of changes 420 */ 421 public Change diff(final ScriptBuilder bld) { 422 423 // Some lines are obviously insertions or deletions because they don't match anything. 424 // Detect them now, and avoid even thinking about them in the main comparison algorithm. 425 discardConfusingLines(); 426 427 // Now do the main comparison algorithm, considering just the undiscarded lines. 428 xvec = filevec[0].undiscarded; 429 yvec = filevec[1].undiscarded; 430 431 int diags = filevec[0].nondiscardedLines + filevec[1].nondiscardedLines + 3; 432 fdiag = new int[diags]; 433 fdiagoff = filevec[1].nondiscardedLines + 1; 434 bdiag = new int[diags]; 435 bdiagoff = filevec[1].nondiscardedLines + 1; 436 437 compareseq(0, filevec[0].nondiscardedLines, 438 0, filevec[1].nondiscardedLines); 439 fdiag = null; 440 bdiag = null; 441 442 // Modify the results slightly to make them prettier in cases where that can validly be done. 443 shiftBoundaries(); 444 445 // Get the results of comparison in the form of a chain of `struct change's -- an edit script. 446 return bld.buildScript( 447 filevec[0].changedFlag, 448 filevec[0].bufferedLines, 449 filevec[1].changedFlag, 450 filevec[1].bufferedLines 451 ); 452 } 453 454 /** The result of comparison is an "edit script": a chain of change objects. 455 Each change represents one place where some lines are deleted 456 and some are inserted. 457 458 LINE0 and LINE1 are the first affected lines in the two files (origin 0). 459 DELETED is the number of lines deleted here from file 0. 460 INSERTED is the number of lines inserted here in file 1. 461 462 If DELETED is 0 then LINE0 is the number of the line before 463 which the insertion was done; vice versa for INSERTED and LINE1. */ 464 465 public static class Change { 466 /** Previous or next edit command. */ 467 public Change link; 468 /** # lines of file 1 changed here. */ 469 public final int inserted; 470 /** # lines of file 0 changed here. */ 471 public final int deleted; 472 /** Line number of 1st deleted line. */ 473 public final int line0; 474 /** Line number of 1st inserted line. */ 475 public final int line1; 476 477 /** 478 * Cons an additional entry onto the front of an edit script OLD. 479 * LINE0 and LINE1 are the first affected lines in the two files (origin 0). 480 * DELETED is the number of lines deleted here from file 0. 481 * INSERTED is the number of lines inserted here in file 1. 482 * 483 * If DELETED is 0 then LINE0 is the number of the line before 484 * which the insertion was done; vice versa for INSERTED and LINE1. 485 * @param line0 first affected lines in the two files (origin 0) 486 * @param line1 first affected lines in the two files (origin 0) 487 * @param deleted the number of lines deleted here from file 0 488 * @param inserted the number of lines inserted here in file 1 489 * @param old edit script 490 */ 491 public Change(int line0, int line1, int deleted, int inserted, Change old) { 492 this.line0 = line0; 493 this.line1 = line1; 494 this.inserted = inserted; 495 this.deleted = deleted; 496 this.link = old; 497 } 498 499 /** 500 * Returns the number of insertions and deletions of this change as well as 501 * (recursively) the changes linked via {@link #link}. 502 * @return recursive number of insertions and deletions 503 */ 504 public int getTotalNumberOfChanges() { 505 return inserted + deleted + (link != null ? link.getTotalNumberOfChanges() : 0); 506 } 507 508 @Override 509 public String toString() { 510 String s = String.format("%d -%d +%d %d", line0, deleted, inserted, line1); 511 return (link != null) ? s + '\n' + link : s; 512 } 513 } 514 515 /** 516 * Data on one input file being compared. 517 */ 518 class FileData { 519 520 /** Allocate changed array for the results of comparison. */ 521 void clear() { 522 // Allocate a flag for each line of each file, saying whether that line is an insertion or deletion. 523 // Allocate an extra element, always zero, at each end of each vector. 524 changedFlag = new boolean[bufferedLines + 2]; 525 } 526 527 /** 528 * Return equiv_count[I] as the number of lines in this file that fall in equivalence class I. 529 * @return the array of equivalence class counts. 530 */ 531 int[] equivCount() { 532 int[] equivCount = new int[equivMax]; 533 for (int i = 0; i < bufferedLines; ++i) { 534 ++equivCount[equivs[i]]; 535 } 536 return equivCount; 537 } 538 539 /** 540 * Discard lines that have no matches in another file. 541 * 542 * A line which is discarded will not be considered by the actual comparison algorithm; 543 * it will be as if that line were not in the file. 544 * The file's `realindexes' table maps virtual line numbers 545 * (which don't count the discarded lines) into real line numbers; 546 * this is how the actual comparison algorithm produces results 547 * that are comprehensible when the discarded lines are counted. 548 * <p> 549 * When we discard a line, we also mark it as a deletion or insertion so that it will be printed in the output. 550 * @param f the other file 551 */ 552 void discardConfusingLines(FileData f) { 553 clear(); 554 // Set up table of which lines are going to be discarded. 555 final byte[] discarded = discardable(f.equivCount()); 556 557 // Don't really discard the provisional lines except when they occur in a run of discardables, 558 // with nonprovisionals at the beginning and end. 559 filterDiscards(discarded); 560 561 // Actually discard the lines. 562 discard(discarded); 563 } 564 565 /** 566 * Mark to be discarded each line that matches no line of another file. 567 * If a line matches many lines, mark it as provisionally discardable. 568 * @param counts The count of each equivalence number for the other file. 569 * @return 0=nondiscardable, 1=discardable or 2=provisionally discardable for each line 570 * @see #equivCount() 571 */ 572 private byte[] discardable(final int... counts) { 573 final int end = bufferedLines; 574 final byte[] discards = new byte[end]; 575 final int[] equivs = this.equivs; 576 int many = 5; 577 int tem = end / 64; 578 579 /* Multiply MANY by approximate square root of number of lines. 580 That is the threshold for provisionally discardable lines. */ 581 while ((tem = tem >> 2) > 0) { 582 many *= 2; 583 } 584 585 for (int i = 0; i < end; i++) { 586 int nmatch; 587 if (equivs[i] == 0) { 588 continue; 589 } 590 nmatch = counts[equivs[i]]; 591 if (nmatch == 0) { 592 discards[i] = 1; 593 } else if (nmatch > many) { 594 discards[i] = 2; 595 } 596 } 597 return discards; 598 } 599 600 /** 601 * Don't really discard the provisional lines except when they occur 602 * in a run of discardables, with nonprovisionals at the beginning and end. 603 * @param discards discards 604 */ 605 private void filterDiscards(final byte[] discards) { 606 final int end = bufferedLines; 607 608 for (int i = 0; i < end; i++) { 609 /* Cancel provisional discards not in middle of run of discards. */ 610 if (discards[i] == 2) { 611 discards[i] = 0; 612 } else if (discards[i] != 0) { 613 /* We have found a nonprovisional discard. */ 614 int j; 615 int length; 616 int provisional = 0; 617 618 /* Find end of this run of discardable lines. 619 Count how many are provisionally discardable. */ 620 for (j = i; j < end; j++) { 621 if (discards[j] == 0) { 622 break; 623 } 624 if (discards[j] == 2) { 625 ++provisional; 626 } 627 } 628 629 /* Cancel provisional discards at end, and shrink the run. */ 630 while (j > i && discards[j - 1] == 2) { 631 discards[--j] = 0; --provisional; 632 } 633 634 /* Now we have the length of a run of discardable lines 635 whose first and last are not provisional. */ 636 length = j - i; 637 638 /* If 1/4 of the lines in the run are provisional, 639 cancel discarding of all provisional lines in the run. */ 640 if (provisional * 4 > length) { 641 while (j > i) 642 if (discards[--j] == 2) { 643 discards[j] = 0; 644 } 645 } else { 646 int consec; 647 int minimum = 1; 648 int tem = length / 4; 649 650 /* MINIMUM is approximate square root of LENGTH/4. 651 A subrun of two or more provisionals can stand 652 when LENGTH is at least 16. 653 A subrun of 4 or more can stand when LENGTH >= 64. */ 654 while ((tem = tem >> 2) > 0) { 655 minimum *= 2; 656 } 657 minimum++; 658 659 /* Cancel any subrun of MINIMUM or more provisionals 660 within the larger run. */ 661 for (j = 0, consec = 0; j < length; j++) { 662 if (discards[i + j] != 2) { 663 consec = 0; 664 } else if (minimum == ++consec) { 665 /* Back up to start of subrun, to cancel it all. */ 666 j -= consec; 667 } else if (minimum < consec) { 668 discards[i + j] = 0; 669 } 670 } 671 672 /* Scan from beginning of run 673 until we find 3 or more nonprovisionals in a row 674 or until the first nonprovisional at least 8 lines in. 675 Until that point, cancel any provisionals. */ 676 for (j = 0, consec = 0; j < length; j++) { 677 if (j >= 8 && discards[i + j] == 1) { 678 break; 679 } 680 if (discards[i + j] == 2) { 681 consec = 0; discards[i + j] = 0; 682 } else if (discards[i + j] == 0) { 683 consec = 0; 684 } else { 685 consec++; 686 } 687 if (consec == 3) { 688 break; 689 } 690 } 691 692 /* I advances to the last line of the run. */ 693 i += length - 1; 694 695 /* Same thing, from end. */ 696 for (j = 0, consec = 0; j < length; j++) { 697 if (j >= 8 && discards[i - j] == 1) { 698 break; 699 } 700 if (discards[i - j] == 2) { 701 consec = 0; discards[i - j] = 0; 702 } else if (discards[i - j] == 0) { 703 consec = 0; 704 } else { 705 consec++; 706 } 707 if (consec == 3) { 708 break; 709 } 710 } 711 } 712 } 713 } 714 } 715 716 /** Actually discard the lines. 717 @param discards flags lines to be discarded 718 */ 719 private void discard(final byte[] discards) { 720 final int end = bufferedLines; 721 int j = 0; 722 for (int i = 0; i < end; ++i) { 723 if (discards[i] == 0) { 724 undiscarded[j] = equivs[i]; 725 realindexes[j++] = i; 726 } else { 727 changedFlag[1+i] = true; 728 } 729 } 730 nondiscardedLines = j; 731 } 732 733 FileData(int length) { 734 bufferedLines = length; 735 equivs = new int[length]; 736 undiscarded = new int[bufferedLines]; 737 realindexes = new int[bufferedLines]; 738 } 739 740 FileData(Object[] data, Map<Object, Integer> h) { 741 this(data.length); 742 // FIXME: diff 2.7 removes common prefix and common suffix 743 for (int i = 0; i < data.length; ++i) { 744 Integer ir = h.get(data[i]); 745 if (ir == null) { 746 equivs[i] = equivMax++; 747 h.put(data[i], equivs[i]); 748 } else { 749 equivs[i] = ir.intValue(); 750 } 751 } 752 } 753 754 /** 755 * Adjust inserts/deletes of blank lines to join changes as much as possible. 756 * 757 * We do something when a run of changed lines include a blank line at one end and have an excluded blank line at the other. 758 * We are free to choose which blank line is included. 759 * `compareseq' always chooses the one at the beginning, but usually it is cleaner to consider the following blank line 760 * to be the "change". The only exception is if the preceding blank line would join this change to other changes. 761 * @param f the file being compared against 762 */ 763 void shiftBoundaries(FileData f) { 764 final boolean[] changed = changedFlag; 765 final boolean[] otherChanged = f.changedFlag; 766 int i = 0; 767 int j = 0; 768 int iEnd = bufferedLines; 769 int preceding = -1; 770 int otherPreceding = -1; 771 772 for (;;) { 773 int start, end, otherStart; 774 775 /* Scan forwards to find beginning of another run of changes. 776 Also keep track of the corresponding point in the other file. */ 777 778 while (i < iEnd && !changed[1+i]) { 779 while (otherChanged[1+j++]) { 780 /* Non-corresponding lines in the other file 781 will count as the preceding batch of changes. */ 782 otherPreceding = j; 783 } 784 i++; 785 } 786 787 if (i == iEnd) { 788 break; 789 } 790 791 start = i; 792 otherStart = j; 793 794 boolean loop = true; 795 while (loop) { 796 /* Now find the end of this run of changes. */ 797 798 while (i < iEnd && changed[1+i]) { 799 i++; 800 } 801 end = i; 802 803 /* If the first changed line matches the following unchanged one, 804 and this run does not follow right after a previous run, 805 and there are no lines deleted from the other file here, 806 then classify the first changed line as unchanged 807 and the following line as changed in its place. */ 808 809 /* You might ask, how could this run follow right after another? 810 Only because the previous run was shifted here. */ 811 812 if (end != iEnd && equivs[start] == equivs[end] && !otherChanged[1+j] 813 && !((preceding >= 0 && start == preceding) || (otherPreceding >= 0 && otherStart == otherPreceding))) { 814 changed[1+end] = true; 815 changed[1+start++] = false; 816 ++i; 817 /* Since one line-that-matches is now before this run 818 instead of after, we must advance in the other file 819 to keep in synch. */ 820 ++j; 821 } else { 822 loop = false; 823 } 824 } 825 826 preceding = i; 827 otherPreceding = j; 828 } 829 } 830 831 /** Number of elements (lines) in this file. */ 832 private final int bufferedLines; 833 834 /** Vector, indexed by line number, containing an equivalence code for each line. 835 * It is this vector that is actually compared with that of another file to generate differences. */ 836 private final int[] equivs; 837 838 /** Vector, like the previous one except that the elements for discarded lines have been squeezed out. */ 839 private final int[] undiscarded; 840 841 /** Vector mapping virtual line numbers (not counting discarded lines) to real ones (counting those lines). 842 * Both are origin-0. */ 843 private final int[] realindexes; 844 845 /** Total number of nondiscarded lines. */ 846 private int nondiscardedLines; 847 848 /** Array, indexed by real origin-1 line number, containing true for a line that is an insertion or a deletion. 849 The results of comparison are stored here. */ 850 private boolean[] changedFlag; 851 } 852}