dc=array(" ","\t","\r","\n","<",">","\"","'","=","/"); $this->nc=array("<",">","=","/"); $this->qc=array("\"","'"); $this->sc=array("\r","\n"," ","\t"); $this->prevstate=array("state"=>0,"word"=>""); $this->pg=&$grammar; $this->pos=0; $this->stacktag=array(); $this->stacktagpos=-1; $this->content=array(); $this->content["contentpos"]=-1; $this->c=&$this->content; $this->cp=-1; $this->quotstate=-1; $this->allreadyparsed=0; $this->text=""; $this->processtag=0; $this->processpar=0; $this->processparvalue=0; $this->slevel=array(0); $this->slevelpos=0; $this->quottype=""; $this->skipto=""; $this->incomment=0; $this->tagreg=array(); $this->wasquot=0; if(isset($this->data) && is_array($this->data)) { $this->content=&$data; $this->allreadyparsed=1; return; } clearstatcache(); $this->name=$data; if (!$datatype) { $this->name=$name; $this->data=$data; $this->length=strlen($this->data); return; } if (!$fp=fopen($this->name,"rb")) { $this->SetError(1,"Can't open file $this->name.",0,0,"Error"); return; } flock($fp,1); $this->data=fread($fp,filesize($this->name)); flock($fp,3); fclose($fp); $this->length=strlen($this->data); } /******************************************************************************************** * Get word from data ********************************************************************************************/ function GetWord(&$word) { $word=""; $this->wasquot=0; if ($this->pos>$this->length) return false; while (1) { if ($this->pos>$this->length) return false; if ($this->pos==$this->length) { $this->pos++; return true; } if ($this->data[$this->pos]=="<") { if ($this->data[$this->pos+1]=="!") if ($this->length>6 && $this->length-$this->pos+1>6) { if (substr($this->data,$this->pos,4)=="") { $word.="-->"; $this->pos+=3; break; } else $word.=$this->data[$this->pos++]; } if ($this->incomment) break; } } } if (!$this->processtag) { if ($this->data[$this->pos]=="<") { $this->processtag=1; $this->tagpos=strlen($this->text); } else { $this->text.=$this->data[$this->pos++]; continue; } } if (in_array($this->data[$this->pos],$this->dc)) { if (($this->data[$this->pos]=="<" || $this->data[$this->pos]==">") && $this->quotstate==-1 && $this->processparvalue) { $this->processparvalue=0; return true; } if (in_array($this->data[$this->pos],$this->sc) && $this->quotstate==-1) { $this->text.=$this->data[$this->pos++]; if (strlen($word)) { if ($this->processparvalue) $this->processparvalue=0; return true; } else continue; } if (!strlen($word)) { if (in_array($this->data[$this->pos],$this->qc) && $this->processpar) { if ($this->quotstate==-1) { $this->wasquot=1; $this->quotstate*=-1; $this->quottype=$this->data[$this->pos]; $this->text.=$this->data[$this->pos++]; continue; } elseif ($this->quottype==$this->data[$this->pos]) { $this->quotstate*=-1; $this->quottype=$this->data[$this->pos]; $this->processpar=$this->processparvalue=0; $this->text.=$this->data[$this->pos++]; return true; } } elseif (in_array($this->data[$this->pos],$this->nc)) { $word.=$this->data[$this->pos]; $this->text.=$this->data[$this->pos++]; if ($this->processparvalue) continue; else return true; } } else { if (in_array($this->data[$this->pos],$this->qc) && $this->processpar) { if ($this->quotstate==1) { if ($this->data[$this->pos]==$this->quottype && $this->processparvalue) { $this->quotstate*=-1; $this->quottype=$this->data[$this->pos]; $this->processpar=$this->processparvalue=0; $this->text.=$this->data[$this->pos++]; // continue; } else { if ($this->data[$this->pos]==$this->quottype) { $this->quotstate*=-1; $this->quottype=""; } $word.=$this->data[$this->pos]; $this->text.=$this->data[$this->pos++]; continue; } } return true; } else { if (in_array($this->data[$this->pos],$this->nc)) { if ($this->quotstate==-1) { if ($this->processparvalue) { if($this->data[$this->pos]!="/" && $this->data[$this->pos]!="=") return true; $word.=$this->data[$this->pos]; $this->text.=$this->data[$this->pos++]; continue; } } else { $word.=$this->data[$this->pos]; $this->text.=$this->data[$this->pos++]; continue; } return true; } elseif ($this->quotstate==-1 && $this->processparvalue && strlen($word)) { if ($this->data[$this->pos]==" ") { $this->text.=$this->data[$this->pos++]; $this->processparvalue=0; return true; } } } } } $word.=$this->data[$this->pos]; $this->text.=$this->data[$this->pos++]; } return true; } /******************************************************************************************** * Parse HTML code ******************************************************************************************** | <[/]tagname> in/state 0 1 2 3 4 5 6 7 8 < 1 -1 -1 -1 -1 -1 -1 -1 -1 / -1 7 6 6 6 6 -1 -1 -1 = -1 -1 -1 4 -1 -1 -1 -1 -1 > -1 -1 -2 -2 -2 -2 -2 -1 -3 anyword -1 2 3 3 5 3 -1 8 -1 -3 end parse close tag -2 end parse open tag -1 error 0 begin parse 1 got '<', waiting '/' or any word as tag name 2 got any word as tagname, waiting '/' or '>' or any word as parameter name 3 got any word as parameter name, waiting '/' or '>' or '=' or any word as parameter name 4 got '=' waiting '/' or '>' or any word as parameter value 5 got any word as parameter value, waiting '/' or '>' or any word as parameter name 6 got '/' waiting '>' 7 got '/', waiting any word as close tagname 8 got any word as close tag name, waiting '>' ********************************************************************************************/ function Parse() { $automat=array( // states 0 1 2 3 4 5 6 7 8 "0"=>array( 1, -1, -1, -1, -1, -1, -1, -1, -1),// < "1"=>array(-1, 7, 6, 6, 6, 6, -1, -1, -1),// / "2"=>array(-1, -1, -1, 4, -1, -1, -1, -1, -1),// = "3"=>array(-1, -1, -2, -2, -2, -2, -2, -1, -3),// > "4"=>array(-1, 2, 3, 3, 5, 3, -1, 8, -1) // any word ); if (!strlen($this->data)) return; $instates=array("<"=>0,"/"=>1,"="=>2,">"=>3); $parcount=0; $state=0; $this->c=&$this->content; $this->cp=&$this->content["contentpos"]; $this->stacktag[0]["tag"]=&$this->c; $this->stacktag[0]["level"]=&$this->slevel; $this->stacktag[0]["levelpos"]=0; $this->stacktagpos=0; while(1) { if (!$isword=$this->GetWord($word)) break; $w=strtolower($word); if (!isset($instates[$w])) $instate=4; else $instate=$instates[$w]; //print htmlspecialchars($word).",$state,$instate,$this->quottype
"; $state=$automat[$instate][$state]; if ($this->wasquot && $state==6) $state=5; //print htmlspecialchars($word).",$state
"; switch($state) { case -3:// end parse close tag if (strlen($this->skipto) && $this->tagname!=$this->skipto) { $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0; $this->pars=array(); break; } else $this->skipto=""; $script=($this->tagname=="script") ? 1:0; $this->AddNewText(substr($this->text,0,$this->tagpos),$script); $this->AddNewTag(0); $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0; $this->quottype=""; $this->quotstate=-1; $this->text=""; $this->pars=array(); $this->tagpos=0; break; case -2:// end parse open tag if (strlen($this->skipto)) { $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0; $this->pars=array(); break; } $this->AddNewText(substr($this->text,0,$this->tagpos)); $this->AddNewTag(1,$xmlclose); $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0; $this->quottype=""; $this->quotstate=-1; $this->text=""; $this->pars=array(); $this->tagpos=0; if (isset($this->pg[$this->tagname]["nohavetags"]) && !strlen($this->skipto)) $this->skipto=$this->tagname; break; case -1:// Error found $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0; $this->pars=array(); if ($this->incomment) { if (strlen($this->text)) { $this->AddNewText($this->text); $this->text=""; $this->tagpos=0; } $this->AddNewText($word,0,1); $this->incomment=0; break; } if ($word=="<") { $state=1; $this->processtag=1; $this->processparvalue=0; $this->tagpos=strlen($this->text)-1; $this->quottype=""; $this->quotstate=-1; } break; case 2:// got any word as tagname, waiting '/' or '>' or any word as parameter name $this->tagname=$w; $xmlclose=0; if (!ereg("^[a-zA-Z0-9!_-]+$",$this->tagname) || strlen($this->skipto)) { $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0; $this->quottype=""; $this->quotstate=-1; $this->pars=array(); break; } break; case 3:// got any word as parameter name, waiting '/' or '>' or '=' or any word as parameter name $this->parname=$w; if (!ereg("^[a-zA-Z0-9!_-]+$",$this->parname) || strlen($this->skipto)) { $parcount=$state=$this->processpar=$this->processparvalue=$this->processtag=0; $this->quottype=""; $this->quotstate=-1; $this->pars=array(); break; } $this->processpar=1; if ($w!="/") { $parcount++; $this->pars[$this->parname]["single"]=1; } else $xmlclose=1; break; case 4:// got '=' waiting '/' or '>' or any word as parameter value $this->processparvalue=1; break; case 5:// got any word as parameter value, waiting '/' or '>' or any word as parameter name if ($this->parname!="/") { unset($this->pars[$this->parname]["single"]); $this->pars[$this->parname]["value"]=$word; $this->pars[$this->parname]["quot"]=$this->quottype; } $this->quottype=""; $this->processpar=$this->processparvalue=0; break; case 6:// got '/' waiting '>' $xmlclose=1; break; case 8:// got any word as close tag name, waiting '>' $this->tagname=$w; break; } $this->prevstate["states"]=$state; $this->prevstate["word"]=$word; } if (strlen($this->text)) $this->AddNewText($this->text); } /******************************************************************************************** * Add new tag ********************************************************************************************/ function AddNewTag($open,$xmlclose=0) { $actionclose=0; if (!$open && in_array( $this->tagname, $this->pg ) && $this->pg[$this->tagname]["endtag"]!="absent") $actionclose=1; if ($open) for ($i=$this->stacktagpos;$i>0;$i--) { $ct=&$this->stacktag[$i]["tag"]; $t=&$ct[$ct["contentpos"]]; $tagname=$t["data"]["name"]; if (isset($this->pg[$tagname]["closeon"])) { if (isset($this->pg[$tagname]["closeon"]["in"]) && sizeof($this->pg[$tagname]["closeon"]["in"]) && in_array($this->tagname,$this->pg[$tagname]["closeon"]["in"]) || isset($this->pg[$tagname]["closeon"]["notin"]) && sizeof($this->pg[$tagname]["closeon"]["notin"]) && !in_array($this->tagname,$this->pg[$tagname]["closeon"]["notin"])) { $actionclose=2; break; } } if ($actionclose!=2) $i=-1; } if ($actionclose) { if ($actionclose==1) { $i=$this->FindTag($this->tagname); if ($i>-1) if ($this->tagreg[$this->tagname]!=$this->stacktag[$i]["num"]) $i=-1; } if ($i>-1) { $this->c=&$this->stacktag[$i]["tag"]; $this->cp=&$this->c["contentpos"]; $this->stacktagpos=$i; if ($actionclose==1) { $c=&$this->c[$this->c["contentpos"]]["content"]; $cp=&$this->c[$this->c["contentpos"]]["content"]["contentpos"]; $cp++; $c[$cp]["type"]="tag"; $c[$cp]["data"]["name"]=$this->tagname; $c[$cp]["data"]["type"]="close"; if (isset($this->tagreg[$this->tagname])) if ($this->tagreg[$this->tagname]) $this->tagreg[$this->tagname]--; $this->stacktag[$this->stacktagpos]["num"]=$this->tagreg[$this->tagname]; $this->stacktagpos--; } if ($this->stacktagposstacktag)) for ($i=$this->stacktagpos+1;$istacktag);$i++) unset($this->stacktag[$i]); if ($actionclose==1) return; } } $this->cp++; $this->c[$this->cp]["type"]="tag"; $this->c[$this->cp]["data"]["name"]=$this->tagname; $this->c[$this->cp]["data"]["type"]=($open) ? "open" : "close"; if (!$open) if (isset($this->tagreg[$this->tagname])) if ($this->tagreg[$this->tagname]) $this->tagreg[$this->tagname]--; if ($xmlclose) $this->c[$this->cp]["xmlclose"]=1; if (sizeof($this->pars)) $this->c[$this->cp]["pars"]=$this->pars; if ($open && !$xmlclose && in_array( $this->tagname, $this->pg ) && $this->pg[$this->tagname]["endtag"]!="absent") { if (!isset($this->tagreg[$this->tagname])) $this->tagreg[$this->tagname]=0; $this->tagreg[$this->tagname]++; $this->stacktagpos++; $this->stacktag[$this->stacktagpos]["tag"]=&$this->c; $this->stacktag[$this->stacktagpos]["num"]=$this->tagreg[$this->tagname]; $this->c[$this->cp]["content"]=array(); $this->c[$this->cp]["content"]["contentpos"]=-1; $this->c=&$this->c[$this->cp]["content"]; $this->cp=&$this->c["contentpos"]; } } /******************************************************************************************** * Add new text ********************************************************************************************/ function AddNewText($text,$script=0,$comment=0) { if (!strlen($text)) return; $this->cp++; if (!$comment) $this->c[$this->cp]["type"]="text"; else $this->c[$this->cp]["type"]="comment"; if ($script) { $inputarray=array("/_top/","/top.location.href/","/([ \n]+)?window\.name/","/parent.location/"); $replarray=array("_echoserver_file_space","parent.frames('_echoserver_file_space').src","//window.name","parent.frames('_echoserver_file_space').src"); /* $text=str_replace("_top","_echoserver_file_space",$text); $text=str_replace("top.location.href","parent.frames('_echoserver_file_space').src",$text); $text=preg_replace("/([ \n]+)?window\.name/","//window.name",$text); */ $text=preg_replace($inputarray,$replarray,$text); } $this->c[$this->cp]["data"]=$text; $this->text=""; } /******************************************************************************************** * Find first tag in stack ********************************************************************************************/ function FindTag($tagname) { for($i=$this->stacktagpos;$i>=0;$i--) if ($this->stacktag[$i]["tag"][$this->stacktag[$i]["tag"]["contentpos"]]["data"]["name"]==$tagname) return $i; return -1; } } } //_ECHOSERVER_HTML_PARSER ?>