00001 <?php
00002
00027 class LastCommonParentResult {
00028
00029
00030 public $parent;
00031
00032
00033 public $splittingNeeded = false;
00034
00035
00036 public $lastCommonParentDepth = -1;
00037
00038
00039 public $indexInLastCommonParent = -1;
00040 }
00041
00042 class Modification{
00043
00044 const NONE = 1;
00045 const REMOVED = 2;
00046 const ADDED = 4;
00047 const CHANGED = 8;
00048
00049 public $type;
00050
00051 public $id = -1;
00052
00053 public $firstOfID = false;
00054
00055 public $changes;
00056
00057 function __construct($type) {
00058 $this->type = $type;
00059 }
00060
00061 public static function typeToString($type) {
00062 switch($type) {
00063 case self::NONE: return 'none';
00064 case self::REMOVED: return 'removed';
00065 case self::ADDED: return 'added';
00066 case self::CHANGED: return 'changed';
00067 }
00068 }
00069 }
00070
00071 class DomTreeBuilder {
00072
00073 public $textNodes = array();
00074
00075 public $bodyNode;
00076
00077 private $currentParent;
00078
00079 private $newWord = '';
00080
00081 protected $bodyStarted = false;
00082
00083 protected $bodyEnded = false;
00084
00085 private $whiteSpaceBeforeThis = false;
00086
00087 private $lastSibling;
00088
00089 private $notInPre = true;
00090
00091 function __construct() {
00092 $this->bodyNode = $this->currentParent = new BodyNode();
00093 $this->lastSibling = new DummyNode();
00094 }
00095
00099 public function endDocument() {
00100 $this->endWord();
00101 HTMLDiffer::diffDebug( count($this->textNodes) . " text nodes in document.\n" );
00102 }
00103
00104 public function startElement($parser, $name, $attributes) {
00105 if (strcasecmp($name, 'body') != 0) {
00106 HTMLDiffer::diffDebug( "Starting $name node.\n" );
00107 $this->endWord();
00108
00109 $newNode = new TagNode($this->currentParent, $name, $attributes);
00110 $this->currentParent->children[] = $newNode;
00111 $this->currentParent = $newNode;
00112 $this->lastSibling = new DummyNode();
00113 if ($this->whiteSpaceBeforeThis && !in_array(strtolower($this->currentParent->qName),TagNode::$blocks)) {
00114 $this->currentParent->whiteBefore = true;
00115 }
00116 $this->whiteSpaceBeforeThis = false;
00117 if(strcasecmp($name, 'pre') == 0) {
00118 $this->notInPre = false;
00119 }
00120 }
00121 }
00122
00123 public function endElement($parser, $name) {
00124 if(strcasecmp($name, 'body') != 0) {
00125 HTMLDiffer::diffDebug( "Ending $name node.\n");
00126 if (0 == strcasecmp($name,'img')) {
00127
00128 $img = new ImageNode($this->currentParent, $this->currentParent->attributes);
00129 $this->currentParent->children[] = $img;
00130 $img->whiteBefore = $this->whiteSpaceBeforeThis;
00131 $this->lastSibling = $img;
00132 $this->textNodes[] = $img;
00133 }
00134 $this->endWord();
00135 if (!in_array(strtolower($this->currentParent->qName),TagNode::$blocks)) {
00136 $this->lastSibling = $this->currentParent;
00137 } else {
00138 $this->lastSibling = new DummyNode();
00139 }
00140 $this->currentParent = $this->currentParent->parent;
00141 $this->whiteSpaceBeforeThis = false;
00142 if (!$this->notInPre && strcasecmp($name, 'pre') == 0) {
00143 $this->notInPre = true;
00144 }
00145 } else {
00146 $this->endDocument();
00147 }
00148 }
00149
00150 const regex = '/([\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1})/';
00151 const whitespace = '/^[\s]{1}$/';
00152 const delimiter = '/^[\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1}$/';
00153
00154 public function characters($parser, $data) {
00155 $matches = preg_split(self::regex, $data, -1, PREG_SPLIT_DELIM_CAPTURE);
00156
00157 foreach($matches as &$word) {
00158 if (preg_match(self::whitespace, $word) && $this->notInPre) {
00159 $this->endWord();
00160 $this->lastSibling->whiteAfter = true;
00161 $this->whiteSpaceBeforeThis = true;
00162 } else if (preg_match(self::delimiter, $word)) {
00163 $this->endWord();
00164 $textNode = new TextNode($this->currentParent, $word);
00165 $this->currentParent->children[] = $textNode;
00166 $textNode->whiteBefore = $this->whiteSpaceBeforeThis;
00167 $this->whiteSpaceBeforeThis = false;
00168 $this->lastSibling = $textNode;
00169 $this->textNodes[] = $textNode;
00170 } else {
00171 $this->newWord .= $word;
00172 }
00173 }
00174 }
00175
00176 private function endWord() {
00177 if ($this->newWord !== '') {
00178 $node = new TextNode($this->currentParent, $this->newWord);
00179 $this->currentParent->children[] = $node;
00180 $node->whiteBefore = $this->whiteSpaceBeforeThis;
00181 $this->whiteSpaceBeforeThis = false;
00182 $this->lastSibling = $node;
00183 $this->textNodes[] = $node;
00184 $this->newWord = "";
00185 }
00186 }
00187
00188 public function getDiffLines() {
00189 return array_map(array('TextNode','toDiffLine'), $this->textNodes);
00190 }
00191 }
00192
00193 class TextNodeDiffer {
00194
00195 private $textNodes;
00196 public $bodyNode;
00197
00198 private $oldTextNodes;
00199 private $oldBodyNode;
00200
00201 private $newID = 0;
00202
00203 private $changedID = 0;
00204
00205 private $changedIDUsed = false;
00206
00207
00208 private $whiteAfterLastChangedPart = false;
00209
00210 private $deletedID = 0;
00211
00212 function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) {
00213 $this->textNodes = $tree->textNodes;
00214 $this->bodyNode = $tree->bodyNode;
00215 $this->oldTextNodes = $oldTree->textNodes;
00216 $this->oldBodyNode = $oldTree->bodyNode;
00217 }
00218
00219 public function markAsNew($start, $end) {
00220 if ($end <= $start) {
00221 return;
00222 }
00223
00224 if ($this->whiteAfterLastChangedPart) {
00225 $this->textNodes[$start]->whiteBefore = false;
00226 }
00227
00228 for ($i = $start; $i < $end; ++$i) {
00229 $mod = new Modification(Modification::ADDED);
00230 $mod->id = $this->newID;
00231 $this->textNodes[$i]->modification = $mod;
00232 }
00233 if ($start < $end) {
00234 $this->textNodes[$start]->modification->firstOfID = true;
00235 }
00236 ++$this->newID;
00237 }
00238
00239 public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) {
00240 $i = $rightstart;
00241 $j = $leftstart;
00242
00243 if ($this->changedIDUsed) {
00244 ++$this->changedID;
00245 $this->changedIDUsed = false;
00246 }
00247
00248 $changes;
00249 while ($i < $rightend) {
00250 $acthis = new AncestorComparator($this->textNodes[$i]->getParentTree());
00251 $acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree());
00252 $result = $acthis->getResult($acother);
00253 unset($acthis, $acother);
00254
00255 if ( $result ) {
00256 $mod = new Modification(Modification::CHANGED);
00257
00258 if (!$this->changedIDUsed) {
00259 $mod->firstOfID = true;
00260 } else if (!is_null( $result ) && $result !== $this->changes) {
00261 ++$this->changedID;
00262 $mod->firstOfID = true;
00263 }
00264
00265 $mod->changes = $result;
00266 $mod->id = $this->changedID;
00267
00268 $this->textNodes[$i]->modification = $mod;
00269 $this->changes = $result;
00270 $this->changedIDUsed = true;
00271 } else if ($this->changedIDUsed) {
00272 ++$this->changedID;
00273 $this->changedIDUsed = false;
00274 }
00275 ++$i;
00276 ++$j;
00277 }
00278 }
00279
00280 public function markAsDeleted($start, $end, $before) {
00281
00282 if ($end <= $start) {
00283 return;
00284 }
00285
00286 if ($before > 0 && $this->textNodes[$before - 1]->whiteAfter) {
00287 $this->whiteAfterLastChangedPart = true;
00288 } else {
00289 $this->whiteAfterLastChangedPart = false;
00290 }
00291
00292 for ($i = $start; $i < $end; ++$i) {
00293 $mod = new Modification(Modification::REMOVED);
00294 $mod->id = $this->deletedID;
00295
00296
00297
00298 $this->oldTextNodes[$i]->modification = $mod;
00299 }
00300 $this->oldTextNodes[$start]->modification->firstOfID = true;
00301
00302 $root = $this->oldTextNodes[$start]->getLastCommonParent($this->oldTextNodes[$end-1])->parent;
00303
00304 $junk1 = $junk2 = null;
00305 $deletedNodes = $root->getMinimalDeletedSet($this->deletedID, $junk1, $junk2);
00306
00307 HTMLDiffer::diffDebug( "Minimal set of deleted nodes of size " . count($deletedNodes) . "\n" );
00308
00309
00310
00311 if ($before > 0) {
00312 $prevLeaf = $this->textNodes[$before - 1];
00313 }
00314
00315
00316 if ($before < count($this->textNodes)) {
00317 $nextLeaf = $this->textNodes[$before];
00318 }
00319
00320 while (count($deletedNodes) > 0) {
00321 if (isset($prevLeaf)) {
00322 $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
00323 } else {
00324 $prevResult = new LastCommonParentResult();
00325 $prevResult->parent = $this->bodyNode;
00326 $prevResult->indexInLastCommonParent = -1;
00327 }
00328 if (isset($nextleaf)) {
00329 $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[count($deletedNodes) - 1]);
00330 } else {
00331 $nextResult = new LastCommonParentResult();
00332 $nextResult->parent = $this->bodyNode;
00333 $nextResult->indexInLastCommonParent = $this->bodyNode->getNbChildren();
00334 }
00335
00336 if ($prevResult->lastCommonParentDepth == $nextResult->lastCommonParentDepth) {
00337
00338 if ($deletedNodes[0]->parent === $deletedNodes[count($deletedNodes) - 1]->parent
00339 && $prevResult->parent === $nextResult->parent) {
00340
00341 $prevResult->lastCommonParentDepth = $prevResult->lastCommonParentDepth + 1;
00342 } else {
00343
00344
00345 $distancePrev = $deletedNodes[0]->parent->getMatchRatio($prevResult->parent);
00346 $distanceNext = $deletedNodes[count($deletedNodes) - 1]->parent->getMatchRatio($nextResult->parent);
00347
00348 if ($distancePrev <= $distanceNext) {
00349 $prevResult->lastCommonParentDepth = $prevResult->lastCommonParentDepth + 1;
00350 } else {
00351 $nextResult->lastCommonParentDepth = $nextResult->lastCommonParentDepth + 1;
00352 }
00353 }
00354
00355 }
00356
00357 if ($prevResult->lastCommonParentDepth > $nextResult->lastCommonParentDepth) {
00358
00359 if ($prevResult->splittingNeeded) {
00360 $prevLeaf->parent->splitUntil($prevResult->parent, $prevLeaf, true);
00361 }
00362 $prevLeaf = $deletedNodes[0]->copyTree();
00363 unset($deletedNodes[0]);
00364 $deletedNodes = array_values($deletedNodes);
00365 $prevLeaf->setParent($prevResult->parent);
00366 $prevResult->parent->addChildAbsolute($prevLeaf,$prevResult->indexInLastCommonParent + 1);
00367 } else if ($prevResult->lastCommonParentDepth < $nextResult->lastCommonParentDepth) {
00368
00369 if ($nextResult->splittingNeeded) {
00370 $splitOccured = $nextLeaf->parent->splitUntil($nextResult->parent, $nextLeaf, false);
00371 if ($splitOccured) {
00372
00373
00374 $nextResult->indexInLastCommonParent = $nextResult->indexInLastCommonParent + 1;
00375 }
00376 }
00377 $nextLeaf = $deletedNodes[count(deletedNodes) - 1]->copyTree();
00378 unset($deletedNodes[count(deletedNodes) - 1]);
00379 $deletedNodes = array_values($deletedNodes);
00380 $nextLeaf->setParent($nextResult->parent);
00381 $nextResult->parent->addChildAbsolute($nextLeaf,$nextResult->indexInLastCommonParent);
00382 }
00383 }
00384 ++$this->deletedID;
00385 }
00386
00387 public function expandWhiteSpace() {
00388 $this->bodyNode->expandWhiteSpace();
00389 }
00390
00391 public function lengthNew(){
00392 return count($this->textNodes);
00393 }
00394
00395 public function lengthOld(){
00396 return count($this->oldTextNodes);
00397 }
00398 }
00399
00400 class HTMLDiffer {
00401
00402 private $output;
00403 private static $debug = '';
00404
00405 function __construct($output) {
00406 $this->output = $output;
00407 }
00408
00409 function htmlDiff($from, $to) {
00410 wfProfileIn( __METHOD__ );
00411
00412 $xml_parser = xml_parser_create('');
00413
00414 $domfrom = new DomTreeBuilder();
00415
00416
00417 xml_set_element_handler($xml_parser, array($domfrom, "startElement"), array($domfrom, "endElement"));
00418
00419
00420 xml_set_character_data_handler($xml_parser, array($domfrom, "characters"));
00421
00422 HTMLDiffer::diffDebug( "Parsing " . strlen($from) . " characters worth of HTML\n" );
00423 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', false)
00424 || !xml_parse($xml_parser, $from, false)
00425 || !xml_parse($xml_parser, '</body>', true)){
00426 $error = xml_error_string(xml_get_error_code($xml_parser));
00427 $line = xml_get_current_line_number($xml_parser);
00428 HTMLDiffer::diffDebug( "XML error: $error at line $line\n" );
00429 }
00430 xml_parser_free($xml_parser);
00431 unset($from);
00432
00433 $xml_parser = xml_parser_create('');
00434
00435 $domto = new DomTreeBuilder();
00436
00437
00438 xml_set_element_handler($xml_parser, array($domto, "startElement"), array($domto, "endElement"));
00439
00440
00441 xml_set_character_data_handler($xml_parser, array($domto, "characters"));
00442
00443 HTMLDiffer::diffDebug( "Parsing " . strlen($to) . " characters worth of HTML\n" );
00444 if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', false)
00445 || !xml_parse($xml_parser, $to, false)
00446 || !xml_parse($xml_parser, '</body>', true)){
00447 $error = xml_error_string(xml_get_error_code($xml_parser));
00448 $line = xml_get_current_line_number($xml_parser);
00449 HTMLDiffer::diffDebug( "XML error: $error at line $line\n" );
00450 }
00451 xml_parser_free($xml_parser);
00452 unset($to);
00453
00454 $diffengine = new WikiDiff3();
00455 $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
00456 unset($xml_parser, $diffengine);
00457
00458 $domdiffer = new TextNodeDiffer($domto, $domfrom);
00459
00460 $currentIndexLeft = 0;
00461 $currentIndexRight = 0;
00462 foreach ($differences as &$d) {
00463 if ($d->leftstart > $currentIndexLeft) {
00464 $domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart,
00465 $currentIndexRight, $d->rightstart);
00466 }
00467 if ($d->leftlength > 0) {
00468 $domdiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart);
00469 }
00470 $domdiffer->markAsNew($d->rightstart, $d->rightend);
00471
00472 $currentIndexLeft = $d->leftend;
00473 $currentIndexRight = $d->rightend;
00474 }
00475 $oldLength = $domdiffer->lengthOld();
00476 if ($currentIndexLeft < $oldLength) {
00477 $domdiffer->handlePossibleChangedPart($currentIndexLeft, $oldLength, $currentIndexRight, $domdiffer->lengthNew());
00478 }
00479 $domdiffer->expandWhiteSpace();
00480 $output = new HTMLOutput('htmldiff', $this->output);
00481 $output->parse($domdiffer->bodyNode);
00482 wfProfileOut( __METHOD__ );
00483 }
00484
00485 private function preProcess( $differences) {
00486 $newRanges = array();
00487
00488 $nbDifferences = count($differences);
00489 for ($i = 0; $i < $nbDifferences; ++$i) {
00490 $leftStart = $differences[$i]->leftstart;
00491 $leftEnd = $differences[$i]->leftend;
00492 $rightStart = $differences[$i]->rightstart;
00493 $rightEnd = $differences[$i]->rightend;
00494
00495 $leftLength = $leftEnd - $leftStart;
00496 $rightLength = $rightEnd - $rightStart;
00497
00498 while ($i + 1 < $nbDifferences && self::score($leftLength,
00499 $differences[$i + 1]->leftlength,
00500 $rightLength,
00501 $differences[$i + 1]->rightlength)
00502 > ($differences[$i + 1]->leftstart - $leftEnd)) {
00503 $leftEnd = $differences[$i + 1]->leftend;
00504 $rightEnd = $differences[$i + 1]->rightend;
00505 $leftLength = $leftEnd - $leftStart;
00506 $rightLength = $rightEnd - $rightStart;
00507 ++$i;
00508 }
00509 $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd);
00510 }
00511 return $newRanges;
00512 }
00513
00517 public static function score($ll, $nll, $rl, $nrl) {
00518 if (($ll == 0 && $nll == 0)
00519 || ($rl == 0 && $nrl == 0)) {
00520 return 0;
00521 }
00522 $numbers = array($ll, $nll, $rl, $nrl);
00523 $d = 0;
00524 foreach ($numbers as &$number) {
00525 while ($number > 3) {
00526 $d += 3;
00527 $number -= 3;
00528 $number *= 0.5;
00529 }
00530 $d += $number;
00531
00532 }
00533 return $d / (1.5 * count($numbers));
00534 }
00535
00540 public static function diffDebug( $str ) {
00541 self :: $debug .= $str;
00542 }
00543
00548 public static function getDebugOutput() {
00549 return self :: $debug;
00550 }
00551
00552 }
00553
00554 class TextOnlyComparator {
00555
00556 public $leafs = array();
00557
00558 function _construct(TagNode $tree) {
00559 $this->addRecursive($tree);
00560 $this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs);
00561 }
00562
00563 private function addRecursive(TagNode $tree) {
00564 foreach ($tree->children as &$child) {
00565 if ($child instanceof TagNode) {
00566 $this->addRecursive($child);
00567 } else if ($child instanceof TextNode) {
00568 $this->leafs[] = $node;
00569 }
00570 }
00571 }
00572
00573 public function getMatchRatio(TextOnlyComparator $other) {
00574 $nbOthers = count($other->leafs);
00575 $nbThis = count($this->leafs);
00576 if($nbOthers == 0 || $nbThis == 0){
00577 return -log(0);
00578 }
00579
00580 $diffengine = new WikiDiff3(25000, 1.35);
00581 $diffengine->diff($this->leafs, $other->leafs);
00582
00583 $lcsLength = $diffengine->getLcsLength();
00584
00585 $distanceThis = $nbThis-$lcsLength;
00586
00587 return (2.0 - $lcsLength/$nbOthers - $lcsLength/$nbThis) / 2.0;
00588 }
00589 }
00590
00594 class AncestorComparator {
00595
00596 public $ancestors;
00597 public $ancestorsText;
00598
00599 function __construct( $ancestors) {
00600 $this->ancestors = $ancestors;
00601 $this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors);
00602 }
00603
00604 public $compareTxt = "";
00605
00606 public function getResult(AncestorComparator $other) {
00607
00608 $diffengine = new WikiDiff3(10000, 1.35);
00609 $differences = $diffengine->diff_range($other->ancestorsText,$this->ancestorsText);
00610
00611 if (count($differences) == 0){
00612 return null;
00613 }
00614 $changeTxt = new ChangeTextGenerator($this, $other);
00615
00616 return $changeTxt->getChanged($differences)->toString();;
00617 }
00618 }
00619
00620 class ChangeTextGenerator {
00621
00622 private $ancestorComparator;
00623 private $other;
00624
00625 private $factory;
00626
00627 function __construct(AncestorComparator $ancestorComparator, AncestorComparator $other) {
00628 $this->ancestorComparator = $ancestorComparator;
00629 $this->other = $other;
00630 $this->factory = new TagToStringFactory();
00631 }
00632
00633 public function getChanged( $differences) {
00634 $txt = new ChangeText;
00635 $rootlistopened = false;
00636 if (count($differences) > 1) {
00637 $txt->addHtml('<ul class="changelist">');
00638 $rootlistopened = true;
00639 }
00640 $nbDifferences = count($differences);
00641 for ($j = 0; $j < $nbDifferences; ++$j) {
00642 $d = $differences[$j];
00643 $lvl1listopened = false;
00644 if ($rootlistopened) {
00645 $txt->addHtml('<li>');
00646 }
00647 if ($d->leftlength + $d->rightlength > 1) {
00648 $txt->addHtml('<ul class="changelist">');
00649 $lvl1listopened = true;
00650 }
00651
00652 for ($i = $d->leftstart; $i < $d->leftend; ++$i) {
00653 if ($lvl1listopened){
00654 $txt->addHtml('<li>');
00655 }
00656
00657 $this->addTagOld($txt, $this->other->ancestors[$i]);
00658 if ($lvl1listopened){
00659 $txt->addHtml('</li>');
00660 }
00661 }
00662
00663 for ($i = $d->rightstart; $i < $d->rightend; ++$i) {
00664 if ($lvl1listopened){
00665 $txt->addHtml('<li>');
00666 }
00667
00668 $this->addTagNew($txt, $this->ancestorComparator->ancestors[$i]);
00669
00670 if ($lvl1listopened){
00671 $txt->addHtml('</li>');
00672 }
00673 }
00674 if ($lvl1listopened) {
00675 $txt->addHtml('</ul>');
00676 }
00677 if ($rootlistopened) {
00678 $txt->addHtml('</li>');
00679 }
00680 }
00681 if ($rootlistopened) {
00682 $txt->addHtml('</ul>');
00683 }
00684 return $txt;
00685 }
00686
00687 private function addTagOld(ChangeText $txt, TagNode $ancestor) {
00688 $this->factory->create($ancestor)->getRemovedDescription($txt);
00689 }
00690
00691 private function addTagNew(ChangeText $txt, TagNode $ancestor) {
00692 $this->factory->create($ancestor)->getAddedDescription($txt);
00693 }
00694 }
00695
00696 class ChangeText {
00697
00698 private $txt = "";
00699
00700 public function addHtml($s) {
00701 $this->txt .= $s;
00702 }
00703
00704 public function toString() {
00705 return $this->txt;
00706 }
00707 }
00708
00709 class TagToStringFactory {
00710
00711 private static $containerTags = array('html', 'body', 'p', 'blockquote',
00712 'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li',
00713 'table', 'tbody', 'tr', 'td', 'th', 'br', 'hr', 'code', 'dl',
00714 'dt', 'dd', 'input', 'form', 'img', 'span', 'a');
00715
00716 private static $styleTags = array('i', 'b', 'strong', 'em', 'font',
00717 'big', 'del', 'tt', 'sub', 'sup', 'strike');
00718
00719 const MOVED = 1;
00720 const STYLE = 2;
00721 const UNKNOWN = 4;
00722
00723 public function create(TagNode $node) {
00724 $sem = $this->getChangeSemantic($node->qName);
00725 if (strcasecmp($node->qName,'a') == 0) {
00726 return new AnchorToString($node, $sem);
00727 }
00728 if (strcasecmp($node->qName,'img') == 0) {
00729 return new NoContentTagToString($node, $sem);
00730 }
00731 return new TagToString($node, $sem);
00732 }
00733
00734 protected function getChangeSemantic($qname) {
00735 if (in_array(strtolower($qname),self::$containerTags)) {
00736 return self::MOVED;
00737 }
00738 if (in_array(strtolower($qname),self::$styleTags)) {
00739 return self::STYLE;
00740 }
00741 return self::UNKNOWN;
00742 }
00743 }
00744
00745 class TagToString {
00746
00747 protected $node;
00748
00749 protected $sem;
00750
00751 function __construct(TagNode $node, $sem) {
00752 $this->node = $node;
00753 $this->sem = $sem;
00754 }
00755
00756 public function getRemovedDescription(ChangeText $txt) {
00757 $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
00758 if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
00759 $tagDescription = "<" . $this->node->qName . ">";
00760 }
00761 if ($this->sem == TagToStringFactory::MOVED) {
00762 $txt->addHtml( wfMsgExt( 'diff-movedoutof', 'parseinline', $tagDescription ) );
00763 } else if ($this->sem == TagToStringFactory::STYLE) {
00764 $txt->addHtml( wfMsgExt( 'diff-styleremoved' , 'parseinline', $tagDescription ) );
00765 } else {
00766 $txt->addHtml( wfMsgExt( 'diff-removed' , 'parseinline', $tagDescription ) );
00767 }
00768 $this->addAttributes($txt, $this->node->attributes);
00769 $txt->addHtml('.');
00770 }
00771
00772 public function getAddedDescription(ChangeText $txt) {
00773 $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
00774 if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
00775 $tagDescription = "<" . $this->node->qName . ">";
00776 }
00777 if ($this->sem == TagToStringFactory::MOVED) {
00778 $txt->addHtml( wfMsgExt( 'diff-movedto' , 'parseinline', $tagDescription) );
00779 } else if ($this->sem == TagToStringFactory::STYLE) {
00780 $txt->addHtml( wfMsgExt( 'diff-styleadded', 'parseinline', $tagDescription ) );
00781 } else {
00782 $txt->addHtml( wfMsgExt( 'diff-added', 'parseinline', $tagDescription ) );
00783 }
00784 $this->addAttributes($txt, $this->node->attributes);
00785 $txt->addHtml('.');
00786 }
00787
00788 protected function addAttributes(ChangeText $txt, array $attributes) {
00789 if (count($attributes) < 1) {
00790 return;
00791 }
00792 $firstOne = true;
00793 $nbAttributes_min_1 = count($attributes)-1;
00794 $keys = array_keys($attributes);
00795 for ($i=0;$i<$nbAttributes_min_1;$i++) {
00796 $key = $keys[$i];
00797 $attr = $attributes[$key];
00798 if($firstOne) {
00799 $firstOne = false;
00800 $txt->addHtml( wfMsgExt('diff-with', 'escapenoentities', $this->translateArgument($key), htmlspecialchars($attr) ) );
00801 continue;
00802 }
00803 $txt->addHtml( wfMsgExt( 'comma-separator', 'escapenoentities' ) .
00804 wfMsgExt( 'diff-with-additional', 'escapenoentities',
00805 $this->translateArgument( $key ), htmlspecialchars( $attr ) )
00806 );
00807 }
00808
00809 if ($nbAttributes_min_1 > 0) {
00810 $txt->addHtml( wfMsgExt( 'diff-with-final', 'escapenoentities',
00811 $this->translateArgument($keys[$nbAttributes_min_1]),
00812 htmlspecialchars($attributes[$keys[$nbAttributes_min_1]]) ) );
00813 }
00814 }
00815
00816 protected function translateArgument($name) {
00817 $translation = wfMsgExt('diff-' . $name, 'parseinline' );
00818 if ( wfEmptyMsg( 'diff-' . $name, $translation ) ) {
00819 $translation = "<" . $name . ">";;
00820 }
00821 return htmlspecialchars( $translation );
00822 }
00823 }
00824
00825 class NoContentTagToString extends TagToString {
00826
00827 function __construct(TagNode $node, $sem) {
00828 parent::__construct($node, $sem);
00829 }
00830
00831 public function getAddedDescription(ChangeText $txt) {
00832 $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
00833 if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
00834 $tagDescription = "<" . $this->node->qName . ">";
00835 }
00836 $txt->addHtml( wfMsgExt('diff-changedto', 'parseinline', $tagDescription ) );
00837 $this->addAttributes($txt, $this->node->attributes);
00838 $txt->addHtml('.');
00839 }
00840
00841 public function getRemovedDescription(ChangeText $txt) {
00842 $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
00843 if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
00844 $tagDescription = "<" . $this->node->qName . ">";
00845 }
00846 $txt->addHtml( wfMsgExt('diff-changedfrom', 'parseinline', $tagDescription ) );
00847 $this->addAttributes($txt, $this->node->attributes);
00848 $txt->addHtml('.');
00849 }
00850 }
00851
00852 class AnchorToString extends TagToString {
00853
00854 function __construct(TagNode $node, $sem) {
00855 parent::__construct($node, $sem);
00856 }
00857
00858 protected function addAttributes(ChangeText $txt, array $attributes) {
00859 if (array_key_exists('href', $attributes)) {
00860 $txt->addHtml(' ' . wfMsgExt( 'diff-withdestination', 'parseinline', htmlspecialchars($attributes['href']) ) );
00861 unset($attributes['href']);
00862 }
00863 parent::addAttributes($txt, $attributes);
00864 }
00865 }
00866
00870 class HTMLOutput{
00871
00872 private $prefix;
00873 private $handler;
00874
00875 function __construct($prefix, $handler) {
00876 $this->prefix = $prefix;
00877 $this->handler = $handler;
00878 }
00879
00880 public function parse(TagNode $node) {
00881 $handler = &$this->handler;
00882
00883 if (strcasecmp($node->qName, 'img') != 0 && strcasecmp($node->qName, 'body') != 0) {
00884 $handler->startElement($node->qName, $node->attributes);
00885 }
00886
00887 $newStarted = false;
00888 $remStarted = false;
00889 $changeStarted = false;
00890 $changeTXT = '';
00891
00892 foreach ($node->children as &$child) {
00893 if ($child instanceof TagNode) {
00894 if ($newStarted) {
00895 $handler->endElement('span');
00896 $newStarted = false;
00897 } else if ($changeStarted) {
00898 $handler->endElement('span');
00899 $changeStarted = false;
00900 } else if ($remStarted) {
00901 $handler->endElement('span');
00902 $remStarted = false;
00903 }
00904 $this->parse($child);
00905 } else if ($child instanceof TextNode) {
00906 $mod = $child->modification;
00907
00908 if ($newStarted && ($mod->type != Modification::ADDED || $mod->firstOfID)) {
00909 $handler->endElement('span');
00910 $newStarted = false;
00911 } else if ($changeStarted && ($mod->type != Modification::CHANGED
00912 || $mod->changes != $changeTXT || $mod->firstOfID)) {
00913 $handler->endElement('span');
00914 $changeStarted = false;
00915 } else if ($remStarted && ($mod->type != Modification::REMOVED || $mod ->firstOfID)) {
00916 $handler->endElement('span');
00917 $remStarted = false;
00918 }
00919
00920
00921
00922 if (!$newStarted && $mod->type == Modification::ADDED) {
00923 $attrs = array('class' => 'diff-html-added');
00924 if ($mod->firstOfID) {
00925 $attrs['id'] = "added-{$this->prefix}-{$mod->id}";
00926 }
00927 $handler->startElement('span', $attrs);
00928 $newStarted = true;
00929 } else if (!$changeStarted && $mod->type == Modification::CHANGED) {
00930 $attrs = array('class' => 'diff-html-changed');
00931 if ($mod->firstOfID) {
00932 $attrs['id'] = "changed-{$this->prefix}-{$mod->id}";
00933 }
00934 $handler->startElement('span', $attrs);
00935
00936
00937 $handler->startElement('span', array('class' => 'tip'));
00938 $handler->html($mod->changes);
00939 $handler->endElement('span');
00940
00941 $changeStarted = true;
00942 $changeTXT = $mod->changes;
00943 } else if (!$remStarted && $mod->type == Modification::REMOVED) {
00944 $attrs = array('class'=>'diff-html-removed');
00945 if ($mod->firstOfID) {
00946 $attrs['id'] = "removed-{$this->prefix}-{$mod->id}";
00947 }
00948 $handler->startElement('span', $attrs);
00949 $remStarted = true;
00950 }
00951
00952 $chars = $child->text;
00953
00954 if ($child instanceof ImageNode) {
00955 $this->writeImage($child);
00956 } else {
00957 $handler->characters($chars);
00958 }
00959 }
00960 }
00961
00962 if ($newStarted) {
00963 $handler->endElement('span');
00964 $newStarted = false;
00965 } else if ($changeStarted) {
00966 $handler->endElement('span');
00967 $changeStarted = false;
00968 } else if ($remStarted) {
00969 $handler->endElement('span');
00970 $remStarted = false;
00971 }
00972
00973 if (strcasecmp($node->qName, 'img') != 0
00974 && strcasecmp($node->qName, 'body') != 0) {
00975 $handler->endElement($node->qName);
00976 }
00977 }
00978
00979 private function writeImage(ImageNode $imgNode) {
00980 $attrs = $imgNode->attributes;
00981 $this->handler->startElement('img', $attrs);
00982 $this->handler->endElement('img');
00983 }
00984 }
00985
00986 class DelegatingContentHandler {
00987
00988 private $delegate;
00989
00990 function __construct($delegate) {
00991 $this->delegate = $delegate;
00992 }
00993
00994 function startElement($qname, $arguments) {
00995 $this->delegate->addHtml(Xml::openElement($qname, $arguments));
00996 }
00997
00998 function endElement($qname){
00999 $this->delegate->addHtml(Xml::closeElement($qname));
01000 }
01001
01002 function characters($chars){
01003 $this->delegate->addHtml(htmlspecialchars($chars));
01004 }
01005
01006 function html($html){
01007 $this->delegate->addHtml($html);
01008 }
01009 }