Friday, September 3, 2010

PHP XML Pull Parser with Array Iterator

PHP XML Pull Parser with Array Iterator


Parsing Large XML file using PHP XML Reader


  • Files

  • demo.xmlArray.class.php

  • svn-log.xml

  • xmlArray.class.php



demo.xmlArray.class.php



<?php
include_once( 'xmlArray.class.php' );

$oxml = new XMLArray( 'svn-log.xml', 'logentry');
$ret = array();
foreach( $oxml as $k => $logentry )
{
print_r( $logentry );
}
?>


$logentry = Array(
["logentry"] => Array(
["node"] => "logentry"
["atr"] => Array( ["revision"] => 43372 )
["value"] => Array(
["author"] => Array(
["0"] => Array(
["node"] => "author"
["atr"] =>
["value"] => Array( ["0"] => "user.name")
)
)
["date"] => Array(
["0"] => Array(
["node"] => "date"
["atr"] =>
["value"] => Array( ["0"] => "2009-11-25T06:02:34.092477Z" )
)
)
["msg"] => Array(
["0"] => Array(
["node"] => "msg"
["atr"] =>
["value"] => Array()
)
)
)
)
)

svn-log.xml



<?xml version="1.0"?>
<log>
<logentry revision="58245">
<author>User.Name</author>
<date>2010-09-02T14:08:31.878082Z</date>
<paths>
<path kind="" action="A">/code/google/path/svnlog-xml.php</path>
<path kind="" action="A">/code/google/path/svnlog.sh</path>
<path kind="" action="A">/code/google/path/svndiff.sh</path>
<path kind="" action="A">/code/google/path/xmlArray.class.php</path>
</paths>
<msg>added first time </msg>
</logentry>
<logentry revision="58244">
<author>User.Name</author>
<date>2010-09-02T14:07:16.389655Z</date>
<paths>
<path kind="" action="A">/code/google/path/svn-script</path>
</paths>
<msg>added first time </msg>
</logentry>
<logentry revision="58243">
<author>User.Name</author>
<date>2010-09-02T14:05:58.198520Z</date>
<paths>
<path kind="" action="M">/code/google/path/facebook/index.php</path>
</paths>
<msg>dd</msg>
</logentry>
<logentry revision="50577">
<author>User.Name</author>
<date>2010-03-26T07:24:53.579939Z</date>
<paths>
<path kind="" action="M">/code/google/path/facebook/comment.php</path>
</paths>
<msg>Add code to get users wall </msg>
</logentry>
</log>


xmlArray.class.php



<?php
//
//text.xml
//-----------
//<locations><location><a q="qq">1</a><b></b></location><location><c>3</c><d>4</d></location><location><e>5</e><f a="aa" b="ab">6</f><g>7</g><g>77</g><hi><h>8</h><i>9A</i><i>9B</i><i>9C</i></hi></locations>
//
//$o = new XMLIArray( 'test.xml', 'location');
//$a=array();
//foreach( $o as $k => $v){
// $a[$k]=$v;
//}


class XMLArray implements Iterator
{
protected $xmlFilePath;
protected $oXR=null;
protected $tag2Search;
protected $arrCurent;
protected $firstFound = false;
protected $position = 0;
protected $readCalled = false;

public function __construct( $xmlFilePath, $tag2Search , $skipCount=0)
{
$this->xmlFilePath = $xmlFilePath;
$this->tag2Search = $tag2Search;

$this->arrCurent = array();
$this->oXR = new XMLReader();

$this->oXR->open( $this->xmlFilePath );

#$this->debug( 'START',1 );

if( 0 < $skipCount )
{
for($iloop=0; $iloop <= $skipCount; $iloop++)
{
$this->arrCurent = $this->read( $this->tag2Search );
}
}
}

public function read( $tag2Search='' , $depth=0 )
{
$this->readCalled = true;

if( '' == $tag2Search )
{
$tag2Search = $this->tag2Search;
}

$ret = array();
$atr = false;

$currNode = $this->oXR->name;

while( $this->oXR->read() )
{
$val = $this->oXR->value;
switch ($this->oXR->nodeType)
{
case XMLReader::ELEMENT:

$currNode = $this->oXR->name;
#$this->debug( $currNode . ' :: start');

if( false == $this->firstFound )
{
if( $tag2Search == $currNode )
{


$this->firstFound = 1;
$depth = $this->oXR->depth;
#$this->debug( "\t {$currNode} :: Depth-Update => " . $depth );
}
}


if( false == $this->firstFound )
{

continue;
}

$atr = $this->getAttr();
#$this->debug( "\t {$currNode} :: atr => {$atr} ");
#$this->debug( "\t {$currNode} :: depth => {$depth}");
#$this->debug( "\t {$currNode} :: oXR->depth => {$this->oXR->depth}");



if( $depth < $this->oXR->depth )
{
$childVal = $this->read( $currNode, $this->oXR->depth);

if( isset($ret[$tag2Search]['value']) )
{
$ret[$tag2Search]['value'][$currNode][] = array( 'node'=>$currNode, 'atr'=>$atr, 'value'=> $childVal);
}
else
{
$ret[$currNode][] = array( 'node'=>$currNode, 'atr'=>$atr, 'value'=> $childVal);
}

}
else
{
$ret[$tag2Search] = array( 'node'=>$currNode, 'atr'=>$atr, 'value'=> '');

}
break;

case XMLReader::TEXT:
case XMLReader::CDATA:
case XMLReader::WHITESPACE:

if($this->firstFound )
{
return array( $val );
}
break;

case XMLReader::END_ELEMENT:

$currNode = $this->oXR->name;

if( $tag2Search == $currNode )
{

if( $tag2Search == $this->tag2Search)
{
$this->firstFound = false;
}
return $ret;
}
break;

default:
break;
}
}

return false;
}

public function current()
{
if( false == $this->readCalled )
{
$this->arrCurent = $this->read();
}

$ret = $this->arrCurent ;
$this->arrCurent = array();
$this->arrCurent = $this->read();

return $ret;
}

public function key()
{
return $this->position;
}

public function next()
{
++$this->position;
}

public function rewind()
{
$this->position = 0;
}

public function valid()
{
if( false == $this->readCalled )
{
$this->arrCurent = $this->read();
}
return !empty( $this->arrCurent );
}

public function getAttr()
{
$atr=false;

try
{
if( $this->oXR->hasAttributes )
{
$atr=array();

while( $this->oXR->moveToNextAttribute() )
{
$atr[$this->oXR->name] = $this->oXR->value;
}

$this->oXR->moveToElement();
}


}
catch( Exception $e )
{
$this->debug( 'Error' . var_export( $e, true ) );
}

// print_r( $atr );
// $atr = var_export( $atr, true);
// $atr = str_replace( "\n", '',$atr );
// $atr = str_replace( "\t", '',$atr );
// $atr = str_replace( " ", '',$atr );

return $atr;
}

protected function debug($s , $overWrite=false)
{
#echo "\n".$s;
return ;
if( $overWrite )
{
file_put_contents( 'log'.basename(__FILE__).'.log', "\n". $s );
}
else
{
file_put_contents( 'log'.basename(__FILE__).'.log', "\n". $s , FILE_APPEND );
}
}
}
?>




Usefull Links:
http://php.net/manual/en/book.xmlreader.php
http://www.ibm.com/developerworks/library/x-pullparsingphp.html

No comments:

Post a Comment