00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00033 class WikiDiff3 {
00034
00035
00036 private $from;
00037 private $to;
00038 private $m;
00039 private $n;
00040
00041 private $tooLong;
00042 private $powLimit;
00043
00044
00045 private $maxDifferences;
00046 private $lcsLengthCorrectedForHeuristic = false;
00047
00048
00049 public $length;
00050 public $removed;
00051 public $added;
00052 public $heuristicUsed;
00053
00054 function __construct($tooLong = 2000000, $powLimit = 1.45){
00055 $this->tooLong = $tooLong;
00056 $this->powLimit = $powLimit;
00057 }
00058
00059 public function diff( $from, $to){
00060
00061 $m = sizeof($from);
00062 $n = count($to);
00063
00064 $this->heuristicUsed = false;
00065
00066
00067 $removed = $m > 0 ? array_fill(0, $m, true) : array();
00068 $added = $n > 0 ? array_fill(0, $n, true) : array();
00069
00070
00071
00072 $i = 0;
00073 while($i < $m && $i < $n && $from[$i] === $to[$i]) {
00074 $removed[$i] = $added[$i] = false;
00075 unset($from[$i], $to[$i]);
00076 ++$i;
00077 }
00078
00079
00080 $j = 1;
00081 while($i + $j <= $m && $i + $j <= $n && $from[$m - $j] === $to[$n - $j]) {
00082 $removed[$m - $j] = $added[$n - $j] = false;
00083 unset($from[$m - $j], $to[$n - $j]);
00084 ++$j;
00085 }
00086
00087 $this->from = $newFromIndex = $this->to = $newToIndex = array();
00088
00089
00090 $shared = array();
00091 foreach( $from as $key ) {
00092 $shared[$key] = false;
00093 }
00094
00095 foreach($to as $index => &$el) {
00096 if(array_key_exists($el, $shared)) {
00097
00098 $this->to[] = $el;
00099 $shared[$el] = true;
00100 $newToIndex[] = $index;
00101 }
00102 }
00103 foreach($from as $index => &$el) {
00104 if($shared[$el]) {
00105
00106 $this->from[] = $el;
00107 $newFromIndex[] = $index;
00108 }
00109 }
00110
00111 unset($shared, $from, $to);
00112
00113 $this->m = count($this->from);
00114 $this->n = count($this->to);
00115
00116 $this->removed = $this->m > 0 ? array_fill(0, $this->m, true) : array();
00117 $this->added = $this->n > 0 ? array_fill(0, $this->n, true) : array();
00118
00119 if ($this->m == 0 || $this->n == 0) {
00120 $this->length = 0;
00121 } else {
00122 $this->maxDifferences = ceil(($this->m + $this->n) / 2.0);
00123 if ($this->m * $this->n > $this->tooLong) {
00124
00125 $this->maxDifferences = floor(pow($this->maxDifferences, $this->powLimit - 1.0));
00126 wfDebug("Limiting max number of differences to $this->maxDifferences\n");
00127 }
00128
00129
00130
00131
00132
00133 $max = min($this->m, $this->n);
00134 for ($forwardBound = 0; $forwardBound < $max
00135 && $this->from[$forwardBound] === $this->to[$forwardBound];
00136 ++$forwardBound) {
00137 $this->removed[$forwardBound] = $this->added[$forwardBound] = false;
00138 }
00139
00140 $backBoundL1 = $this->m - 1;
00141 $backBoundL2 = $this->n - 1;
00142
00143 while ($backBoundL1 >= $forwardBound && $backBoundL2 >= $forwardBound
00144 && $this->from[$backBoundL1] === $this->to[$backBoundL2]) {
00145 $this->removed[$backBoundL1--] = $this->added[$backBoundL2--] = false;
00146 }
00147
00148 $temp = array_fill(0, $this->m + $this->n + 1, 0);
00149 $V = array($temp, $temp);
00150 $snake = array(0, 0, 0);
00151
00152 $this->length = $forwardBound + $this->m - $backBoundL1 - 1
00153 + $this->lcs_rec($forwardBound, $backBoundL1,
00154 $forwardBound, $backBoundL2, $V, $snake);
00155 }
00156
00157 $this->m = $m;
00158 $this->n = $n;
00159
00160 $this->length += $i + $j - 1;
00161
00162 foreach($this->removed as $key => &$removed_elem) {
00163 if(!$removed_elem) {
00164 $removed[$newFromIndex[$key]] = false;
00165 }
00166 }
00167 foreach($this->added as $key => &$added_elem) {
00168 if(!$added_elem) {
00169 $added[$newToIndex[$key]] = false;
00170 }
00171 }
00172 $this->removed = $removed;
00173 $this->added = $added;
00174 }
00175
00176 function diff_range($from_lines, $to_lines) {
00177
00178 $this->diff($from_lines, $to_lines);
00179 unset($from_lines, $to_lines);
00180
00181 $ranges = array();
00182 $xi = $yi = 0;
00183 while ($xi < $this->m || $yi < $this->n) {
00184
00185 while ($xi < $this->m && $yi < $this->n
00186 && !$this->removed[$xi]
00187 && !$this->added[$yi]) {
00188 ++$xi;
00189 ++$yi;
00190 }
00191
00192 $xstart = $xi;
00193 while ($xi < $this->m && $this->removed[$xi]) {
00194 ++$xi;
00195 }
00196
00197 $ystart = $yi;
00198 while ($yi < $this->n && $this->added[$yi]) {
00199 ++$yi;
00200 }
00201
00202 if ($xi > $xstart || $yi > $ystart) {
00203 $ranges[] = new RangeDifference($xstart, $xi,
00204 $ystart, $yi);
00205 }
00206 }
00207 return $ranges;
00208 }
00209
00210 private function lcs_rec($bottoml1, $topl1, $bottoml2, $topl2, &$V, &$snake) {
00211
00212 if ($bottoml1 > $topl1 || $bottoml2 > $topl2) {
00213 return 0;
00214 }
00215
00216 $d = $this->find_middle_snake($bottoml1, $topl1, $bottoml2,
00217 $topl2, $V, $snake);
00218
00219
00220
00221 $len = $snake[2];
00222 $startx = $snake[0];
00223 $starty = $snake[1];
00224
00225
00226 for ($i = 0; $i < $len; ++$i) {
00227 $this->removed[$startx + $i] = $this->added[$starty + $i] = false;
00228 }
00229
00230 if ($d > 1) {
00231 return $len
00232 + $this->lcs_rec($bottoml1, $startx - 1, $bottoml2,
00233 $starty - 1, $V, $snake)
00234 + $this->lcs_rec($startx + $len, $topl1, $starty + $len,
00235 $topl2, $V, $snake);
00236 } else if ($d == 1) {
00237
00238
00239
00240
00241
00242 $max = min($startx - $bottoml1, $starty - $bottoml2);
00243 for ($i = 0; $i < $max; ++$i) {
00244 $this->removed[$bottoml1 + $i] =
00245 $this->added[$bottoml2 + $i] = false;
00246 }
00247 return $max + $len;
00248 }
00249 return $len;
00250 }
00251
00252 private function find_middle_snake($bottoml1, $topl1, $bottoml2,$topl2, &$V, &$snake) {
00253 $from = &$this->from;
00254 $to = &$this->to;
00255 $V0 = &$V[0];
00256 $V1 = &$V[1];
00257 $snake0 = &$snake[0];
00258 $snake1 = &$snake[1];
00259 $snake2 = &$snake[2];
00260 $bottoml1_min_1 = $bottoml1-1;
00261 $bottoml2_min_1 = $bottoml2-1;
00262 $N = $topl1 - $bottoml1_min_1;
00263 $M = $topl2 - $bottoml2_min_1;
00264 $delta = $N - $M;
00265 $maxabsx = $N+$bottoml1;
00266 $maxabsy = $M+$bottoml2;
00267 $limit = min($this->maxDifferences, ceil(($N + $M ) / 2));
00268
00269
00270
00271 if (($M & 1) == 1) {
00272 $value_to_add_forward = 1;
00273 } else {
00274 $value_to_add_forward = 0;
00275 }
00276
00277 if (($N & 1) == 1) {
00278 $value_to_add_backward = 1;
00279 } else {
00280 $value_to_add_backward = 0;
00281 }
00282
00283 $start_forward = -$M;
00284 $end_forward = $N;
00285 $start_backward = -$N;
00286 $end_backward = $M;
00287
00288 $limit_min_1 = $limit - 1;
00289 $limit_plus_1 = $limit + 1;
00290
00291 $V0[$limit_plus_1] = 0;
00292 $V1[$limit_min_1] = $N;
00293 $limit = min($this->maxDifferences, ceil(($N + $M ) / 2));
00294
00295 if (($delta & 1) == 1) {
00296 for ($d = 0; $d <= $limit; ++$d) {
00297 $start_diag = max($value_to_add_forward + $start_forward, -$d);
00298 $end_diag = min($end_forward, $d);
00299 $value_to_add_forward = 1 - $value_to_add_forward;
00300
00301
00302 for ($k = $start_diag; $k <= $end_diag; $k += 2) {
00303 if ($k == -$d || ($k < $d
00304 && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) {
00305 $x = $V0[$limit_plus_1 + $k];
00306 } else {
00307 $x = $V0[$limit_min_1 + $k] + 1;
00308 }
00309
00310 $absx = $snake0 = $x + $bottoml1;
00311 $absy = $snake1 = $x - $k + $bottoml2;
00312
00313 while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
00314 ++$absx;
00315 ++$absy;
00316 }
00317 $x = $absx-$bottoml1;
00318
00319 $snake2 = $absx -$snake0;
00320 $V0[$limit + $k] = $x;
00321 if ($k >= $delta - $d + 1 && $k <= $delta + $d - 1
00322 && $x >= $V1[$limit + $k - $delta]) {
00323 return 2 * $d - 1;
00324 }
00325
00326
00327 if ($x >= $N && $end_forward > $k - 1) {
00328 $end_forward = $k - 1;
00329 } else if ($absy - $bottoml2 >= $M) {
00330 $start_forward = $k + 1;
00331 $value_to_add_forward = 0;
00332 }
00333 }
00334
00335 $start_diag = max($value_to_add_backward + $start_backward, -$d);
00336 $end_diag = min($end_backward, $d);
00337 $value_to_add_backward = 1 - $value_to_add_backward;
00338
00339
00340 for ($k = $start_diag; $k <= $end_diag; $k += 2) {
00341 if ($k == $d
00342 || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) {
00343 $x = $V1[$limit_min_1 + $k];
00344 } else {
00345 $x = $V1[$limit_plus_1 + $k] - 1;
00346 }
00347
00348 $y = $x - $k - $delta;
00349
00350 $snake2 = 0;
00351 while ($x > 0 && $y > 0
00352 && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) {
00353 --$x;
00354 --$y;
00355 ++$snake2;
00356 }
00357 $V1[$limit + $k] = $x;
00358
00359
00360 if ($x <= 0) {
00361 $start_backward = $k + 1;
00362 $value_to_add_backward = 0;
00363 } else if ($y <= 0 && $end_backward > $k - 1) {
00364 $end_backward = $k - 1;
00365 }
00366 }
00367 }
00368 } else {
00369 for ($d = 0; $d <= $limit; ++$d) {
00370 $start_diag = max($value_to_add_forward + $start_forward, -$d);
00371 $end_diag = min($end_forward, $d);
00372 $value_to_add_forward = 1 - $value_to_add_forward;
00373
00374
00375 for ($k = $start_diag; $k <= $end_diag; $k += 2) {
00376 if ($k == -$d
00377 || ($k < $d && $V0[$limit_min_1 + $k] < $V0[$limit_plus_1 + $k])) {
00378 $x = $V0[$limit_plus_1 + $k];
00379 } else {
00380 $x = $V0[$limit_min_1 + $k] + 1;
00381 }
00382
00383 $absx = $snake0 = $x + $bottoml1;
00384 $absy = $snake1 = $x - $k + $bottoml2;
00385
00386 while ($absx < $maxabsx && $absy < $maxabsy && $from[$absx] === $to[$absy]) {
00387 ++$absx;
00388 ++$absy;
00389 }
00390 $x = $absx-$bottoml1;
00391 $snake2 = $absx -$snake0;
00392 $V0[$limit + $k] = $x;
00393
00394
00395 if ($x >= $N && $end_forward > $k - 1) {
00396 $end_forward = $k - 1;
00397 } else if ($absy-$bottoml2 >= $M) {
00398 $start_forward = $k + 1;
00399 $value_to_add_forward = 0;
00400 }
00401 }
00402
00403 $start_diag = max($value_to_add_backward + $start_backward, -$d);
00404 $end_diag = min($end_backward, $d);
00405 $value_to_add_backward = 1 - $value_to_add_backward;
00406
00407
00408 for ($k = $start_diag; $k <= $end_diag; $k += 2) {
00409 if ($k == $d
00410 || ($k != -$d && $V1[$limit_min_1 + $k] < $V1[$limit_plus_1 + $k])) {
00411 $x = $V1[$limit_min_1 + $k];
00412 } else {
00413 $x = $V1[$limit_plus_1 + $k] - 1;
00414 }
00415
00416 $y = $x - $k - $delta;
00417
00418 $snake2 = 0;
00419 while ($x > 0 && $y > 0
00420 && $from[$x +$bottoml1_min_1] === $to[$y + $bottoml2_min_1]) {
00421 --$x;
00422 --$y;
00423 ++$snake2;
00424 }
00425 $V1[$limit + $k] = $x;
00426
00427 if ($k >= -$delta - $d && $k <= $d - $delta
00428 && $x <= $V0[$limit + $k + $delta]) {
00429 $snake0 = $bottoml1 + $x;
00430 $snake1 = $bottoml2 + $y;
00431 return 2 * $d;
00432 }
00433
00434
00435 if ($x <= 0) {
00436 $start_backward = $k + 1;
00437 $value_to_add_backward = 0;
00438 } else if ($y <= 0 && $end_backward > $k - 1) {
00439 $end_backward = $k - 1;
00440 }
00441 }
00442 }
00443 }
00444
00445
00446
00447
00448
00449
00450 $most_progress = self::findMostProgress($M, $N, $limit, $V);
00451
00452 $snake0 = $bottoml1 + $most_progress[0];
00453 $snake1 = $bottoml2 + $most_progress[1];
00454 $snake2 = 0;
00455 wfDebug("Computing the LCS is too expensive. Using a heuristic.\n");
00456 $this->heuristicUsed = true;
00457 return 5;
00458
00459
00460
00461
00462
00463
00464 }
00465
00466 private static function findMostProgress($M, $N, $limit, $V) {
00467 $delta = $N - $M;
00468
00469 if (($M & 1) == ($limit & 1)) {
00470 $forward_start_diag = max(-$M, -$limit);
00471 } else {
00472 $forward_start_diag = max(1 - $M, -$limit);
00473 }
00474
00475 $forward_end_diag = min($N, $limit);
00476
00477 if (($N & 1) == ($limit & 1)) {
00478 $backward_start_diag = max(-$N, -$limit);
00479 } else {
00480 $backward_start_diag = max(1 - $N, -$limit);
00481 }
00482
00483 $backward_end_diag = -min($M, $limit);
00484
00485 $temp = array(0, 0, 0);
00486
00487
00488 $max_progress = array_fill(0, ceil(max($forward_end_diag - $forward_start_diag,
00489 $backward_end_diag - $backward_start_diag) / 2), $temp);
00490 $num_progress = 0;
00491
00492
00493
00494 for ($k = $forward_start_diag; $k <= $forward_end_diag; $k += 2) {
00495 $x = $V[0][$limit + $k];
00496 $y = $x - $k;
00497 if ($x > $N || $y > $M) {
00498 continue;
00499 }
00500
00501 $progress = $x + $y;
00502 if ($progress > $max_progress[0][2]) {
00503 $num_progress = 0;
00504 $max_progress[0][0] = $x;
00505 $max_progress[0][1] = $y;
00506 $max_progress[0][2] = $progress;
00507 } else if ($progress == $max_progress[0][2]) {
00508 ++$num_progress;
00509 $max_progress[$num_progress][0] = $x;
00510 $max_progress[$num_progress][1] = $y;
00511 $max_progress[$num_progress][2] = $progress;
00512 }
00513 }
00514
00515 $max_progress_forward = true;
00516
00517
00518
00519
00520 for ($k = $backward_start_diag; $k <= $backward_end_diag; $k += 2) {
00521 $x = $V[1][$limit + $k];
00522 $y = $x - $k - $delta;
00523 if ($x < 0 || $y < 0) {
00524 continue;
00525 }
00526
00527 $progress = $N - $x + $M - $y;
00528 if ($progress > $max_progress[0][2]) {
00529 $num_progress = 0;
00530 $max_progress_forward = false;
00531 $max_progress[0][0] = $x;
00532 $max_progress[0][1] = $y;
00533 $max_progress[0][2] = $progress;
00534 } else if ($progress == $max_progress[0][2] && !$max_progress_forward) {
00535 ++$num_progress;
00536 $max_progress[$num_progress][0] = $x;
00537 $max_progress[$num_progress][1] = $y;
00538 $max_progress[$num_progress][2] = $progress;
00539 }
00540 }
00541
00542
00543 return $max_progress[floor($num_progress / 2)];
00544 }
00545
00546 public function getLcsLength(){
00547 if($this->heuristicUsed && !$this->lcsLengthCorrectedForHeuristic){
00548 $this->lcsLengthCorrectedForHeuristic = true;
00549 $this->length = $this->m-array_sum($this->added);
00550 }
00551 return $this->length;
00552 }
00553
00554 }
00555
00562 class RangeDifference {
00563
00564 public $leftstart;
00565 public $leftend;
00566 public $leftlength;
00567
00568 public $rightstart;
00569 public $rightend;
00570 public $rightlength;
00571
00572 function __construct($leftstart, $leftend, $rightstart, $rightend){
00573 $this->leftstart = $leftstart;
00574 $this->leftend = $leftend;
00575 $this->leftlength = $leftend - $leftstart;
00576 $this->rightstart = $rightstart;
00577 $this->rightend = $rightend;
00578 $this->rightlength = $rightend - $rightstart;
00579 }
00580 }