[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 # $Id: XMLParser.pm,v 1.49 2001/03/14 17:13:57 matt Exp $ 2 3 package XML::XPath::XMLParser; 4 5 use strict; 6 7 use XML::Parser; 8 #use XML::XPath; 9 use XML::XPath::Node; 10 use XML::XPath::Node::Element; 11 use XML::XPath::Node::Text; 12 use XML::XPath::Node::Comment; 13 use XML::XPath::Node::PI; 14 use XML::XPath::Node::Attribute; 15 use XML::XPath::Node::Namespace; 16 17 my @options = qw( 18 filename 19 xml 20 parser 21 ioref 22 ); 23 24 my ($_current, $_namespaces_on); 25 my %IdNames; 26 27 use vars qw/$xmlns_ns $xml_ns/; 28 29 $xmlns_ns = "http://www.w3.org/2000/xmlns/"; 30 $xml_ns = "http://www.w3.org/XML/1998/namespace"; 31 32 sub new { 33 my $proto = shift; 34 my $class = ref($proto) || $proto; 35 my %args = @_; 36 my %hash = map(( "_$_" => $args{$_} ), @options); 37 bless \%hash, $class; 38 } 39 40 sub parse { 41 my $self = shift; 42 43 $self->{IdNames} = {}; 44 $self->{InScopeNamespaceStack} = [ { 45 '_Default' => undef, 46 'xmlns' => $xmlns_ns, 47 'xml' => $xml_ns, 48 } ]; 49 50 $self->{NodeStack} = [ ]; 51 52 $self->set_xml($_[0]) if $_[0]; 53 54 my $parser = $self->get_parser || XML::Parser->new( 55 ErrorContext => 2, 56 ParseParamEnt => 1, 57 ); 58 59 $parser->setHandlers( 60 Init => sub { $self->parse_init(@_) }, 61 Char => sub { $self->parse_char(@_) }, 62 Start => sub { $self->parse_start(@_) }, 63 End => sub { $self->parse_end(@_) }, 64 Final => sub { $self->parse_final(@_) }, 65 Proc => sub { $self->parse_pi(@_) }, 66 Comment => sub { $self->parse_comment(@_) }, 67 Attlist => sub { $self->parse_attlist(@_) }, 68 ); 69 70 my $toparse; 71 if ($toparse = $self->get_filename) { 72 return $parser->parsefile($toparse); 73 } 74 else { 75 return $parser->parse($self->get_xml || $self->get_ioref); 76 } 77 } 78 79 sub parsefile { 80 my $self = shift; 81 my ($filename) = @_; 82 $self->set_filename($filename); 83 $self->parse; 84 } 85 86 sub parse_init { 87 my $self = shift; 88 my $e = shift; 89 my $document = XML::XPath::Node::Element->new(); 90 my $newns = XML::XPath::Node::Namespace->new('xml', $xml_ns); 91 $document->appendNamespace($newns); 92 $self->{current} = $self->{DOC_Node} = $document; 93 } 94 95 sub parse_final { 96 my $self = shift; 97 return $self->{DOC_Node}; 98 } 99 100 sub parse_char { 101 my $self = shift; 102 my $e = shift; 103 my $text = shift; 104 105 my $parent = $self->{current}; 106 107 my $last = $parent->getLastChild; 108 if ($last && $last->isTextNode) { 109 # append to previous text node 110 $last->appendText($text); 111 return; 112 } 113 114 my $node = XML::XPath::Node::Text->new($text); 115 $parent->appendChild($node, 1); 116 } 117 118 sub parse_start { 119 my $self = shift; 120 my $e = shift; 121 my $tag = shift; 122 123 push @{ $self->{InScopeNamespaceStack} }, 124 { %{ $self->{InScopeNamespaceStack}[-1] } }; 125 $self->_scan_namespaces(@_); 126 127 my ($prefix, $namespace) = $self->_namespace($tag); 128 129 my $node = XML::XPath::Node::Element->new($tag, $prefix); 130 131 my @attributes; 132 for (my $ii = 0; $ii < $#_; $ii += 2) { 133 my ($name, $value) = ($_[$ii], $_[$ii+1]); 134 if ($name =~ /^xmlns(:(.*))?$/) { 135 # namespace node 136 my $prefix = $2 || '#default'; 137 # warn "Creating NS node: $prefix = $value\n"; 138 my $newns = XML::XPath::Node::Namespace->new($prefix, $value); 139 $node->appendNamespace($newns); 140 } 141 else { 142 my ($prefix, $namespace) = $self->_namespace($name); 143 undef $namespace unless $prefix; 144 145 my $newattr = XML::XPath::Node::Attribute->new($name, $value, $prefix); 146 $node->appendAttribute($newattr, 1); 147 if (exists($self->{IdNames}{$tag}) && ($self->{IdNames}{$tag} eq $name)) { 148 # warn "appending Id Element: $val for ", $node->getName, "\n"; 149 $self->{DOC_Node}->appendIdElement($value, $node); 150 } 151 } 152 } 153 154 $self->{current}->appendChild($node, 1); 155 $self->{current} = $node; 156 } 157 158 sub parse_end { 159 my $self = shift; 160 my $e = shift; 161 $self->{current} = $self->{current}->getParentNode; 162 } 163 164 sub parse_pi { 165 my $self = shift; 166 my $e = shift; 167 my ($target, $data) = @_; 168 my $node = XML::XPath::Node::PI->new($target, $data); 169 $self->{current}->appendChild($node, 1); 170 } 171 172 sub parse_comment { 173 my $self = shift; 174 my $e = shift; 175 my ($data) = @_; 176 my $node = XML::XPath::Node::Comment->new($data); 177 $self->{current}->appendChild($node, 1); 178 } 179 180 sub parse_attlist { 181 my $self = shift; 182 my $e = shift; 183 my ($elname, $attname, $type, $default, $fixed) = @_; 184 if ($type eq 'ID') { 185 $self->{IdNames}{$elname} = $attname; 186 } 187 } 188 189 sub _scan_namespaces { 190 my ($self, %attributes) = @_; 191 192 while (my ($attr_name, $value) = each %attributes) { 193 if ($attr_name eq 'xmlns') { 194 $self->{InScopeNamespaceStack}[-1]{'_Default'} = $value; 195 } elsif ($attr_name =~ /^xmlns:(.*)$/) { 196 my $prefix = $1; 197 $self->{InScopeNamespaceStack}[-1]{$prefix} = $value; 198 } 199 } 200 } 201 202 sub _namespace { 203 my ($self, $name) = @_; 204 205 my ($prefix, $localname) = split(/:/, $name); 206 if (!defined($localname)) { 207 if ($prefix eq 'xmlns') { 208 return '', undef; 209 } else { 210 return '', $self->{InScopeNamespaceStack}[-1]{'_Default'}; 211 } 212 } else { 213 return $prefix, $self->{InScopeNamespaceStack}[-1]{$prefix}; 214 } 215 } 216 217 sub as_string { 218 my $node = shift; 219 $node->toString; 220 } 221 222 sub get_parser { shift->{_parser}; } 223 sub get_filename { shift->{_filename}; } 224 sub get_xml { shift->{_xml}; } 225 sub get_ioref { shift->{_ioref}; } 226 227 sub set_parser { $_[0]->{_parser} = $_[1]; } 228 sub set_filename { $_[0]->{_filename} = $_[1]; } 229 sub set_xml { $_[0]->{_xml} = $_[1]; } 230 sub set_ioref { $_[0]->{_ioref} = $_[1]; } 231 232 1; 233 234 __END__ 235 236 =head1 NAME 237 238 XML::XPath::XMLParser - The default XML parsing class that produces a node tree 239 240 =head1 SYNOPSIS 241 242 my $parser = XML::XPath::XMLParser->new( 243 filename => $self->get_filename, 244 xml => $self->get_xml, 245 ioref => $self->get_ioref, 246 parser => $self->get_parser, 247 ); 248 my $root_node = $parser->parse; 249 250 =head1 DESCRIPTION 251 252 This module generates a node tree for use as the context node for XPath processing. 253 It aims to be a quick parser, nothing fancy, and yet has to store more information 254 than most parsers. To achieve this I've used array refs everywhere - no hashes. 255 I don't have any performance figures for the speedups achieved, so I make no 256 appologies for anyone not used to using arrays instead of hashes. I think they 257 make good sense here where we know the attributes of each type of node. 258 259 =head1 Node Structure 260 261 All nodes have the same first 2 entries in the array: node_parent 262 and node_pos. The type of the node is determined using the ref() function. 263 The node_parent always contains an entry for the parent of the current 264 node - except for the root node which has undef in there. And node_pos is the 265 position of this node in the array that it is in (think: 266 $node == $node->[node_parent]->[node_children]->[$node->[node_pos]] ) 267 268 Nodes are structured as follows: 269 270 =head2 Root Node 271 272 The root node is just an element node with no parent. 273 274 [ 275 undef, # node_parent - check for undef to identify root node 276 undef, # node_pos 277 undef, # node_prefix 278 [ ... ], # node_children (see below) 279 ] 280 281 =head2 Element Node 282 283 [ 284 $parent, # node_parent 285 <position in current array>, # node_pos 286 'xxx', # node_prefix - namespace prefix on this element 287 [ ... ], # node_children 288 'yyy', # node_name - element tag name 289 [ ... ], # node_attribs - attributes on this element 290 [ ... ], # node_namespaces - namespaces currently in scope 291 ] 292 293 =head2 Attribute Node 294 295 [ 296 $parent, # node_parent - the element node 297 <position in current array>, # node_pos 298 'xxx', # node_prefix - namespace prefix on this element 299 'href', # node_key - attribute name 300 'ftp://ftp.com/', # node_value - value in the node 301 ] 302 303 =head2 Namespace Nodes 304 305 Each element has an associated set of namespace nodes that are currently 306 in scope. Each namespace node stores a prefix and the expanded name (retrieved 307 from the xmlns:prefix="..." attribute). 308 309 [ 310 $parent, 311 <pos>, 312 'a', # node_prefix - the namespace as it was written as a prefix 313 'http://my.namespace.com', # node_expanded - the expanded name. 314 ] 315 316 =head2 Text Nodes 317 318 [ 319 $parent, 320 <pos>, 321 'This is some text' # node_text - the text in the node 322 ] 323 324 =head2 Comment Nodes 325 326 [ 327 $parent, 328 <pos>, 329 'This is a comment' # node_comment 330 ] 331 332 =head2 Processing Instruction Nodes 333 334 [ 335 $parent, 336 <pos>, 337 'target', # node_target 338 'data', # node_data 339 ] 340 341 =head1 Usage 342 343 If you feel the need to use this module outside of XML::XPath (for example 344 you might use this module directly so that you can cache parsed trees), you 345 can follow the following API: 346 347 =head2 new 348 349 The new method takes either no parameters, or any of the following parameters: 350 351 filename 352 xml 353 parser 354 ioref 355 356 This uses the familiar hash syntax, so an example might be: 357 358 use XML::XPath::XMLParser; 359 360 my $parser = XML::XPath::XMLParser->new(filename => 'example.xml'); 361 362 The parameters represent a filename, a string containing XML, an XML::Parser 363 instance and an open filehandle ref respectively. You can also set or get all 364 of these properties using the get_ and set_ functions that have the same 365 name as the property: e.g. get_filename, set_ioref, etc. 366 367 =head2 parse 368 369 The parse method generally takes no parameters, however you are free to 370 pass either an open filehandle reference or an XML string if you so require. 371 The return value is a tree that XML::XPath can use. The parse method will 372 die if there is an error in your XML, so be sure to use perl's exception 373 handling mechanism (eval{};) if you want to avoid this. 374 375 =head2 parsefile 376 377 The parsefile method is identical to parse() except it expects a single 378 parameter that is a string naming a file to open and parse. Again it 379 returns a tree and also dies if there are XML errors. 380 381 =head1 NOTICES 382 383 This file is distributed as part of the XML::XPath module, and is copyright 384 2000 Fastnet Software Ltd. Please see the documentation for the module as a 385 whole for licencing information.
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |