[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 package XML::Parser::Expat; 2 3 require 5.004; 4 5 use strict; 6 use vars qw($VERSION @ISA %Handler_Setters %Encoding_Table @Encoding_Path 7 $have_File_Spec); 8 use Carp; 9 10 require DynaLoader; 11 12 @ISA = qw(DynaLoader); 13 $VERSION = "2.36" ; 14 15 $have_File_Spec = $INC{'File/Spec.pm'} || do 'File/Spec.pm'; 16 17 %Encoding_Table = (); 18 if ($have_File_Spec) { 19 @Encoding_Path = (grep(-d $_, 20 map(File::Spec->catdir($_, qw(XML Parser Encodings)), 21 @INC)), 22 File::Spec->curdir); 23 } 24 else { 25 @Encoding_Path = (grep(-d $_, map($_ . '/XML/Parser/Encodings', @INC)), '.'); 26 } 27 28 29 bootstrap XML::Parser::Expat $VERSION; 30 31 %Handler_Setters = ( 32 Start => \&SetStartElementHandler, 33 End => \&SetEndElementHandler, 34 Char => \&SetCharacterDataHandler, 35 Proc => \&SetProcessingInstructionHandler, 36 Comment => \&SetCommentHandler, 37 CdataStart => \&SetStartCdataHandler, 38 CdataEnd => \&SetEndCdataHandler, 39 Default => \&SetDefaultHandler, 40 Unparsed => \&SetUnparsedEntityDeclHandler, 41 Notation => \&SetNotationDeclHandler, 42 ExternEnt => \&SetExternalEntityRefHandler, 43 ExternEntFin => \&SetExtEntFinishHandler, 44 Entity => \&SetEntityDeclHandler, 45 Element => \&SetElementDeclHandler, 46 Attlist => \&SetAttListDeclHandler, 47 Doctype => \&SetDoctypeHandler, 48 DoctypeFin => \&SetEndDoctypeHandler, 49 XMLDecl => \&SetXMLDeclHandler 50 ); 51 52 sub new { 53 my ($class, %args) = @_; 54 my $self = bless \%args, $_[0]; 55 $args{_State_} = 0; 56 $args{Context} = []; 57 $args{Namespaces} ||= 0; 58 $args{ErrorMessage} ||= ''; 59 if ($args{Namespaces}) { 60 $args{Namespace_Table} = {}; 61 $args{Namespace_List} = [undef]; 62 $args{Prefix_Table} = {}; 63 $args{New_Prefixes} = []; 64 } 65 $args{_Setters} = \%Handler_Setters; 66 $args{Parser} = ParserCreate($self, $args{ProtocolEncoding}, 67 $args{Namespaces}); 68 $self; 69 } 70 71 sub load_encoding { 72 my ($file) = @_; 73 74 $file =~ s!([^/]+)$!\L$1\E!; 75 $file .= '.enc' unless $file =~ /\.enc$/; 76 unless ($file =~ m!^/!) { 77 foreach (@Encoding_Path) { 78 my $tmp = ($have_File_Spec 79 ? File::Spec->catfile($_, $file) 80 : "$_/$file"); 81 if (-e $tmp) { 82 $file = $tmp; 83 last; 84 } 85 } 86 } 87 88 local(*ENC); 89 open(ENC, $file) or croak("Couldn't open encmap $file:\n$!\n"); 90 binmode(ENC); 91 my $data; 92 my $br = sysread(ENC, $data, -s $file); 93 croak("Trouble reading $file:\n$!\n") 94 unless defined($br); 95 close(ENC); 96 97 my $name = LoadEncoding($data, $br); 98 croak("$file isn't an encmap file") 99 unless defined($name); 100 101 $name; 102 } # End load_encoding 103 104 sub setHandlers { 105 my ($self, @handler_pairs) = @_; 106 107 croak("Uneven number of arguments to setHandlers method") 108 if (int(@handler_pairs) & 1); 109 110 my @ret; 111 112 while (@handler_pairs) { 113 my $type = shift @handler_pairs; 114 my $handler = shift @handler_pairs; 115 croak "Handler for $type not a Code ref" 116 unless (! defined($handler) or ! $handler or ref($handler) eq 'CODE'); 117 118 my $hndl = $self->{_Setters}->{$type}; 119 120 unless (defined($hndl)) { 121 my @types = sort keys %{$self->{_Setters}}; 122 croak("Unknown Expat handler type: $type\n Valid types: @types"); 123 } 124 125 my $old = &$hndl($self->{Parser}, $handler); 126 push (@ret, $type, $old); 127 } 128 129 return @ret; 130 } 131 132 sub xpcroak 133 { 134 my ($self, $message) = @_; 135 136 my $eclines = $self->{ErrorContext}; 137 my $line = GetCurrentLineNumber($_[0]->{Parser}); 138 $message .= " at line $line"; 139 $message .= ":\n" . $self->position_in_context($eclines) 140 if defined($eclines); 141 croak $message; 142 } 143 144 sub xpcarp { 145 my ($self, $message) = @_; 146 147 my $eclines = $self->{ErrorContext}; 148 my $line = GetCurrentLineNumber($_[0]->{Parser}); 149 $message .= " at line $line"; 150 $message .= ":\n" . $self->position_in_context($eclines) 151 if defined($eclines); 152 carp $message; 153 } 154 155 sub default_current { 156 my $self = shift; 157 if ($self->{_State_} == 1) { 158 return DefaultCurrent($self->{Parser}); 159 } 160 } 161 162 sub recognized_string { 163 my $self = shift; 164 if ($self->{_State_} == 1) { 165 return RecognizedString($self->{Parser}); 166 } 167 } 168 169 sub original_string { 170 my $self = shift; 171 if ($self->{_State_} == 1) { 172 return OriginalString($self->{Parser}); 173 } 174 } 175 176 sub current_line { 177 my $self = shift; 178 if ($self->{_State_} == 1) { 179 return GetCurrentLineNumber($self->{Parser}); 180 } 181 } 182 183 sub current_column { 184 my $self = shift; 185 if ($self->{_State_} == 1) { 186 return GetCurrentColumnNumber($self->{Parser}); 187 } 188 } 189 190 sub current_byte { 191 my $self = shift; 192 if ($self->{_State_} == 1) { 193 return GetCurrentByteIndex($self->{Parser}); 194 } 195 } 196 197 sub base { 198 my ($self, $newbase) = @_; 199 my $p = $self->{Parser}; 200 my $oldbase = GetBase($p); 201 SetBase($p, $newbase) if @_ > 1; 202 return $oldbase; 203 } 204 205 sub context { 206 my $ctx = $_[0]->{Context}; 207 @$ctx; 208 } 209 210 sub current_element { 211 my ($self) = @_; 212 @{$self->{Context}} ? $self->{Context}->[-1] : undef; 213 } 214 215 sub in_element { 216 my ($self, $element) = @_; 217 @{$self->{Context}} ? $self->eq_name($self->{Context}->[-1], $element) 218 : undef; 219 } 220 221 sub within_element { 222 my ($self, $element) = @_; 223 my $cnt = 0; 224 foreach (@{$self->{Context}}) { 225 $cnt++ if $self->eq_name($_, $element); 226 } 227 return $cnt; 228 } 229 230 sub depth { 231 my ($self) = @_; 232 int(@{$self->{Context}}); 233 } 234 235 sub element_index { 236 my ($self) = @_; 237 238 if ($self->{_State_} == 1) { 239 return ElementIndex($self->{Parser}); 240 } 241 } 242 243 ################ 244 # Namespace methods 245 246 sub namespace { 247 my ($self, $name) = @_; 248 local($^W) = 0; 249 $self->{Namespace_List}->[int($name)]; 250 } 251 252 sub eq_name { 253 my ($self, $nm1, $nm2) = @_; 254 local($^W) = 0; 255 256 int($nm1) == int($nm2) and $nm1 eq $nm2; 257 } 258 259 sub generate_ns_name { 260 my ($self, $name, $namespace) = @_; 261 262 $namespace ? 263 GenerateNSName($name, $namespace, $self->{Namespace_Table}, 264 $self->{Namespace_List}) 265 : $name; 266 } 267 268 sub new_ns_prefixes { 269 my ($self) = @_; 270 if ($self->{Namespaces}) { 271 return @{$self->{New_Prefixes}}; 272 } 273 return (); 274 } 275 276 sub expand_ns_prefix { 277 my ($self, $prefix) = @_; 278 279 if ($self->{Namespaces}) { 280 my $stack = $self->{Prefix_Table}->{$prefix}; 281 return (defined($stack) and @$stack) ? $stack->[-1] : undef; 282 } 283 284 return undef; 285 } 286 287 sub current_ns_prefixes { 288 my ($self) = @_; 289 290 if ($self->{Namespaces}) { 291 my %set = %{$self->{Prefix_Table}}; 292 293 if (exists $set{'#default'} and not defined($set{'#default'}->[-1])) { 294 delete $set{'#default'}; 295 } 296 297 return keys %set; 298 } 299 300 return (); 301 } 302 303 304 ################################################################ 305 # Namespace declaration handlers 306 # 307 308 sub NamespaceStart { 309 my ($self, $prefix, $uri) = @_; 310 311 $prefix = '#default' unless defined $prefix; 312 my $stack = $self->{Prefix_Table}->{$prefix}; 313 314 if (defined $stack) { 315 push(@$stack, $uri); 316 } 317 else { 318 $self->{Prefix_Table}->{$prefix} = [$uri]; 319 } 320 321 # The New_Prefixes list gets emptied at end of startElement function 322 # in Expat.xs 323 324 push(@{$self->{New_Prefixes}}, $prefix); 325 } 326 327 sub NamespaceEnd { 328 my ($self, $prefix) = @_; 329 330 $prefix = '#default' unless defined $prefix; 331 332 my $stack = $self->{Prefix_Table}->{$prefix}; 333 if (@$stack > 1) { 334 pop(@$stack); 335 } 336 else { 337 delete $self->{Prefix_Table}->{$prefix}; 338 } 339 } 340 341 ################ 342 343 sub specified_attr { 344 my $self = shift; 345 346 if ($self->{_State_} == 1) { 347 return GetSpecifiedAttributeCount($self->{Parser}); 348 } 349 } 350 351 sub finish { 352 my ($self) = @_; 353 if ($self->{_State_} == 1) { 354 my $parser = $self->{Parser}; 355 UnsetAllHandlers($parser); 356 } 357 } 358 359 sub position_in_context { 360 my ($self, $lines) = @_; 361 if ($self->{_State_} == 1) { 362 my $parser = $self->{Parser}; 363 my ($string, $linepos) = PositionContext($parser, $lines); 364 365 return '' unless defined($string); 366 367 my $col = GetCurrentColumnNumber($parser); 368 my $ptr = ('=' x ($col - 1)) . '^' . "\n"; 369 my $ret; 370 my $dosplit = $linepos < length($string); 371 372 $string .= "\n" unless $string =~ /\n$/; 373 374 if ($dosplit) { 375 $ret = substr($string, 0, $linepos) . $ptr 376 . substr($string, $linepos); 377 } else { 378 $ret = $string . $ptr; 379 } 380 381 return $ret; 382 } 383 } 384 385 sub xml_escape { 386 my $self = shift; 387 my $text = shift; 388 389 study $text; 390 $text =~ s/\&/\&/g; 391 $text =~ s/</\</g; 392 foreach (@_) { 393 croak "xml_escape: '$_' isn't a single character" if length($_) > 1; 394 395 if ($_ eq '>') { 396 $text =~ s/>/\>/g; 397 } 398 elsif ($_ eq '"') { 399 $text =~ s/\"/\"/; 400 } 401 elsif ($_ eq "'") { 402 $text =~ s/\'/\'/; 403 } 404 else { 405 my $rep = '&#' . sprintf('x%X', ord($_)) . ';'; 406 if (/\W/) { 407 my $ptrn = "\\$_"; 408 $text =~ s/$ptrn/$rep/g; 409 } 410 else { 411 $text =~ s/$_/$rep/g; 412 } 413 } 414 } 415 $text; 416 } 417 418 sub skip_until { 419 my $self = shift; 420 if ($self->{_State_} <= 1) { 421 SkipUntil($self->{Parser}, $_[0]); 422 } 423 } 424 425 sub release { 426 my $self = shift; 427 ParserRelease($self->{Parser}); 428 } 429 430 sub DESTROY { 431 my $self = shift; 432 ParserFree($self->{Parser}); 433 } 434 435 sub parse { 436 my $self = shift; 437 my $arg = shift; 438 croak "Parse already in progress (Expat)" if $self->{_State_}; 439 $self->{_State_} = 1; 440 my $parser = $self->{Parser}; 441 my $ioref; 442 my $result = 0; 443 444 if (defined $arg) { 445 if (ref($arg) and UNIVERSAL::isa($arg, 'IO::Handle')) { 446 $ioref = $arg; 447 } elsif (tied($arg)) { 448 my $class = ref($arg); 449 no strict 'refs'; 450 $ioref = $arg if defined &{"$class}::TIEHANDLE"}; 451 } 452 else { 453 require IO::Handle; 454 eval { 455 no strict 'refs'; 456 $ioref = *{$arg}{IO} if defined *{$arg}; 457 }; 458 undef $@; 459 } 460 } 461 462 if (defined($ioref)) { 463 my $delim = $self->{Stream_Delimiter}; 464 my $prev_rs; 465 466 $prev_rs = ref($ioref)->input_record_separator("\n$delim\n") 467 if defined($delim); 468 469 $result = ParseStream($parser, $ioref, $delim); 470 471 ref($ioref)->input_record_separator($prev_rs) 472 if defined($delim); 473 } else { 474 $result = ParseString($parser, $arg); 475 } 476 477 $self->{_State_} = 2; 478 $result or croak $self->{ErrorMessage}; 479 } 480 481 sub parsestring { 482 my $self = shift; 483 $self->parse(@_); 484 } 485 486 sub parsefile { 487 my $self = shift; 488 croak "Parser has already been used" if $self->{_State_}; 489 local(*FILE); 490 open(FILE, $_[0]) or croak "Couldn't open $_[0]:\n$!"; 491 binmode(FILE); 492 my $ret = $self->parse(*FILE); 493 close(FILE); 494 $ret; 495 } 496 497 ################################################################ 498 package XML::Parser::ContentModel; 499 use overload '""' => \&asString, 'eq' => \&thiseq; 500 501 sub EMPTY () {1} 502 sub ANY () {2} 503 sub MIXED () {3} 504 sub NAME () {4} 505 sub CHOICE () {5} 506 sub SEQ () {6} 507 508 509 sub isempty { 510 return $_[0]->{Type} == EMPTY; 511 } 512 513 sub isany { 514 return $_[0]->{Type} == ANY; 515 } 516 517 sub ismixed { 518 return $_[0]->{Type} == MIXED; 519 } 520 521 sub isname { 522 return $_[0]->{Type} == NAME; 523 } 524 525 sub name { 526 return $_[0]->{Tag}; 527 } 528 529 sub ischoice { 530 return $_[0]->{Type} == CHOICE; 531 } 532 533 sub isseq { 534 return $_[0]->{Type} == SEQ; 535 } 536 537 sub quant { 538 return $_[0]->{Quant}; 539 } 540 541 sub children { 542 my $children = $_[0]->{Children}; 543 if (defined $children) { 544 return @$children; 545 } 546 return undef; 547 } 548 549 sub asString { 550 my ($self) = @_; 551 my $ret; 552 553 if ($self->{Type} == NAME) { 554 $ret = $self->{Tag}; 555 } 556 elsif ($self->{Type} == EMPTY) { 557 return "EMPTY"; 558 } 559 elsif ($self->{Type} == ANY) { 560 return "ANY"; 561 } 562 elsif ($self->{Type} == MIXED) { 563 $ret = '(#PCDATA'; 564 foreach (@{$self->{Children}}) { 565 $ret .= '|' . $_; 566 } 567 $ret .= ')'; 568 } 569 else { 570 my $sep = $self->{Type} == CHOICE ? '|' : ','; 571 $ret = '(' . join($sep, map { $_->asString } @{$self->{Children}}) . ')'; 572 } 573 574 $ret .= $self->{Quant} if $self->{Quant}; 575 return $ret; 576 } 577 578 sub thiseq { 579 my $self = shift; 580 581 return $self->asString eq $_[0]; 582 } 583 584 ################################################################ 585 package XML::Parser::ExpatNB; 586 587 use vars qw(@ISA); 588 use Carp; 589 590 @ISA = qw(XML::Parser::Expat); 591 592 sub parse { 593 my $self = shift; 594 my $class = ref($self); 595 croak "parse method not supported in $class"; 596 } 597 598 sub parsestring { 599 my $self = shift; 600 my $class = ref($self); 601 croak "parsestring method not supported in $class"; 602 } 603 604 sub parsefile { 605 my $self = shift; 606 my $class = ref($self); 607 croak "parsefile method not supported in $class"; 608 } 609 610 sub parse_more { 611 my ($self, $data) = @_; 612 613 $self->{_State_} = 1; 614 my $ret = XML::Parser::Expat::ParsePartial($self->{Parser}, $data); 615 616 croak $self->{ErrorMessage} unless $ret; 617 } 618 619 sub parse_done { 620 my $self = shift; 621 622 my $ret = XML::Parser::Expat::ParseDone($self->{Parser}); 623 unless ($ret) { 624 my $msg = $self->{ErrorMessage}; 625 $self->release; 626 croak $msg; 627 } 628 629 $self->{_State_} = 2; 630 631 my $result = $ret; 632 my @result = (); 633 my $final = $self->{FinalHandler}; 634 if (defined $final) { 635 if (wantarray) { 636 @result = &$final($self); 637 } 638 else { 639 $result = &$final($self); 640 } 641 } 642 643 $self->release; 644 645 return unless defined wantarray; 646 return wantarray ? @result : $result; 647 } 648 649 ################################################################ 650 651 package XML::Parser::Encinfo; 652 653 sub DESTROY { 654 my $self = shift; 655 XML::Parser::Expat::FreeEncoding($self); 656 } 657 658 1; 659 660 __END__ 661 662 =head1 NAME 663 664 XML::Parser::Expat - Lowlevel access to James Clark's expat XML parser 665 666 =head1 SYNOPSIS 667 668 use XML::Parser::Expat; 669 670 $parser = new XML::Parser::Expat; 671 $parser->setHandlers('Start' => \&sh, 672 'End' => \&eh, 673 'Char' => \&ch); 674 open(FOO, 'info.xml') or die "Couldn't open"; 675 $parser->parse(*FOO); 676 close(FOO); 677 # $parser->parse('<foo id="me"> here <em>we</em> go </foo>'); 678 679 sub sh 680 { 681 my ($p, $el, %atts) = @_; 682 $p->setHandlers('Char' => \&spec) 683 if ($el eq 'special'); 684 ... 685 } 686 687 sub eh 688 { 689 my ($p, $el) = @_; 690 $p->setHandlers('Char' => \&ch) # Special elements won't contain 691 if ($el eq 'special'); # other special elements 692 ... 693 } 694 695 =head1 DESCRIPTION 696 697 This module provides an interface to James Clark's XML parser, expat. As in 698 expat, a single instance of the parser can only parse one document. Calls 699 to parsestring after the first for a given instance will die. 700 701 Expat (and XML::Parser::Expat) are event based. As the parser recognizes 702 parts of the document (say the start or end of an XML element), then any 703 handlers registered for that type of an event are called with suitable 704 parameters. 705 706 =head1 METHODS 707 708 =over 4 709 710 =item new 711 712 This is a class method, the constructor for XML::Parser::Expat. Options are 713 passed as keyword value pairs. The recognized options are: 714 715 =over 4 716 717 =item * ProtocolEncoding 718 719 The protocol encoding name. The default is none. The expat built-in 720 encodings are: C<UTF-8>, C<ISO-8859-1>, C<UTF-16>, and C<US-ASCII>. 721 Other encodings may be used if they have encoding maps in one of the 722 directories in the @Encoding_Path list. Setting the protocol encoding 723 overrides any encoding in the XML declaration. 724 725 =item * Namespaces 726 727 When this option is given with a true value, then the parser does namespace 728 processing. By default, namespace processing is turned off. When it is 729 turned on, the parser consumes I<xmlns> attributes and strips off prefixes 730 from element and attributes names where those prefixes have a defined 731 namespace. A name's namespace can be found using the L<"namespace"> method 732 and two names can be checked for absolute equality with the L<"eq_name"> 733 method. 734 735 =item * NoExpand 736 737 Normally, the parser will try to expand references to entities defined in 738 the internal subset. If this option is set to a true value, and a default 739 handler is also set, then the default handler will be called when an 740 entity reference is seen in text. This has no effect if a default handler 741 has not been registered, and it has no effect on the expansion of entity 742 references inside attribute values. 743 744 =item * Stream_Delimiter 745 746 This option takes a string value. When this string is found alone on a line 747 while parsing from a stream, then the parse is ended as if it saw an end of 748 file. The intended use is with a stream of xml documents in a MIME multipart 749 format. The string should not contain a trailing newline. 750 751 =item * ErrorContext 752 753 When this option is defined, errors are reported in context. The value 754 of ErrorContext should be the number of lines to show on either side of 755 the line in which the error occurred. 756 757 =item * ParseParamEnt 758 759 Unless standalone is set to "yes" in the XML declaration, setting this to 760 a true value allows the external DTD to be read, and parameter entities 761 to be parsed and expanded. 762 763 =item * Base 764 765 The base to use for relative pathnames or URLs. This can also be done by 766 using the base method. 767 768 =back 769 770 =item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]]) 771 772 This method registers handlers for the various events. If no handlers are 773 registered, then a call to parsestring or parsefile will only determine if 774 the corresponding XML document is well formed (by returning without error.) 775 This may be called from within a handler, after the parse has started. 776 777 Setting a handler to something that evaluates to false unsets that 778 handler. 779 780 This method returns a list of type, handler pairs corresponding to the 781 input. The handlers returned are the ones that were in effect before the 782 call to setHandlers. 783 784 The recognized events and the parameters passed to the corresponding 785 handlers are: 786 787 =over 4 788 789 =item * Start (Parser, Element [, Attr, Val [,...]]) 790 791 This event is generated when an XML start tag is recognized. Parser is 792 an XML::Parser::Expat instance. Element is the name of the XML element that 793 is opened with the start tag. The Attr & Val pairs are generated for each 794 attribute in the start tag. 795 796 =item * End (Parser, Element) 797 798 This event is generated when an XML end tag is recognized. Note that 799 an XML empty tag (<foo/>) generates both a start and an end event. 800 801 There is always a lower level start and end handler installed that wrap 802 the corresponding callbacks. This is to handle the context mechanism. 803 A consequence of this is that the default handler (see below) will not 804 see a start tag or end tag unless the default_current method is called. 805 806 =item * Char (Parser, String) 807 808 This event is generated when non-markup is recognized. The non-markup 809 sequence of characters is in String. A single non-markup sequence of 810 characters may generate multiple calls to this handler. Whatever the 811 encoding of the string in the original document, this is given to the 812 handler in UTF-8. 813 814 =item * Proc (Parser, Target, Data) 815 816 This event is generated when a processing instruction is recognized. 817 818 =item * Comment (Parser, String) 819 820 This event is generated when a comment is recognized. 821 822 =item * CdataStart (Parser) 823 824 This is called at the start of a CDATA section. 825 826 =item * CdataEnd (Parser) 827 828 This is called at the end of a CDATA section. 829 830 =item * Default (Parser, String) 831 832 This is called for any characters that don't have a registered handler. 833 This includes both characters that are part of markup for which no 834 events are generated (markup declarations) and characters that 835 could generate events, but for which no handler has been registered. 836 837 Whatever the encoding in the original document, the string is returned to 838 the handler in UTF-8. 839 840 =item * Unparsed (Parser, Entity, Base, Sysid, Pubid, Notation) 841 842 This is called for a declaration of an unparsed entity. Entity is the name 843 of the entity. Base is the base to be used for resolving a relative URI. 844 Sysid is the system id. Pubid is the public id. Notation is the notation 845 name. Base and Pubid may be undefined. 846 847 =item * Notation (Parser, Notation, Base, Sysid, Pubid) 848 849 This is called for a declaration of notation. Notation is the notation name. 850 Base is the base to be used for resolving a relative URI. Sysid is the system 851 id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined. 852 853 =item * ExternEnt (Parser, Base, Sysid, Pubid) 854 855 This is called when an external entity is referenced. Base is the base to be 856 used for resolving a relative URI. Sysid is the system id. Pubid is the public 857 id. Base, and Pubid may be undefined. 858 859 This handler should either return a string, which represents the contents of 860 the external entity, or return an open filehandle that can be read to obtain 861 the contents of the external entity, or return undef, which indicates the 862 external entity couldn't be found and will generate a parse error. 863 864 If an open filehandle is returned, it must be returned as either a glob 865 (*FOO) or as a reference to a glob (e.g. an instance of IO::Handle). 866 867 =item * ExternEntFin (Parser) 868 869 This is called after an external entity has been parsed. It allows 870 applications to perform cleanup on actions performed in the above 871 ExternEnt handler. 872 873 =item * Entity (Parser, Name, Val, Sysid, Pubid, Ndata, IsParam) 874 875 This is called when an entity is declared. For internal entities, the Val 876 parameter will contain the value and the remaining three parameters will 877 be undefined. For external entities, the Val parameter 878 will be undefined, the Sysid parameter will have the system id, the Pubid 879 parameter will have the public id if it was provided (it will be undefined 880 otherwise), the Ndata parameter will contain the notation for unparsed 881 entities. If this is a parameter entity declaration, then the IsParam 882 parameter is true. 883 884 Note that this handler and the Unparsed handler above overlap. If both are 885 set, then this handler will not be called for unparsed entities. 886 887 =item * Element (Parser, Name, Model) 888 889 The element handler is called when an element declaration is found. Name is 890 the element name, and Model is the content model as an 891 XML::Parser::ContentModel object. See L<"XML::Parser::ContentModel Methods"> 892 for methods available for this class. 893 894 =item * Attlist (Parser, Elname, Attname, Type, Default, Fixed) 895 896 This handler is called for each attribute in an ATTLIST declaration. 897 So an ATTLIST declaration that has multiple attributes 898 will generate multiple calls to this handler. The Elname parameter is the 899 name of the element with which the attribute is being associated. The Attname 900 parameter is the name of the attribute. Type is the attribute type, given as 901 a string. Default is the default value, which will either be "#REQUIRED", 902 "#IMPLIED" or a quoted string (i.e. the returned string will begin and end 903 with a quote character). If Fixed is true, then this is a fixed attribute. 904 905 =item * Doctype (Parser, Name, Sysid, Pubid, Internal) 906 907 This handler is called for DOCTYPE declarations. Name is the document type 908 name. Sysid is the system id of the document type, if it was provided, 909 otherwise it's undefined. Pubid is the public id of the document type, 910 which will be undefined if no public id was given. Internal will be 911 true or false, indicating whether or not the doctype declaration contains 912 an internal subset. 913 914 =item * DoctypeFin (Parser) 915 916 This handler is called after parsing of the DOCTYPE declaration has finished, 917 including any internal or external DTD declarations. 918 919 =item * XMLDecl (Parser, Version, Encoding, Standalone) 920 921 This handler is called for XML declarations. Version is a string containg 922 the version. Encoding is either undefined or contains an encoding string. 923 Standalone is either undefined, or true or false. Undefined indicates 924 that no standalone parameter was given in the XML declaration. True or 925 false indicates "yes" or "no" respectively. 926 927 =back 928 929 =item namespace(name) 930 931 Return the URI of the namespace that the name belongs to. If the name doesn't 932 belong to any namespace, an undef is returned. This is only valid on names 933 received through the Start or End handlers from a single document, or through 934 a call to the generate_ns_name method. In other words, don't use names 935 generated from one instance of XML::Parser::Expat with other instances. 936 937 =item eq_name(name1, name2) 938 939 Return true if name1 and name2 are identical (i.e. same name and from 940 the same namespace.) This is only meaningful if both names were obtained 941 through the Start or End handlers from a single document, or through 942 a call to the generate_ns_name method. 943 944 =item generate_ns_name(name, namespace) 945 946 Return a name, associated with a given namespace, good for using with the 947 above 2 methods. The namespace argument should be the namespace URI, not 948 a prefix. 949 950 =item new_ns_prefixes 951 952 When called from a start tag handler, returns namespace prefixes declared 953 with this start tag. If called elsewere (or if there were no namespace 954 prefixes declared), it returns an empty list. Setting of the default 955 namespace is indicated with '#default' as a prefix. 956 957 =item expand_ns_prefix(prefix) 958 959 Return the uri to which the given prefix is currently bound. Returns 960 undef if the prefix isn't currently bound. Use '#default' to find the 961 current binding of the default namespace (if any). 962 963 =item current_ns_prefixes 964 965 Return a list of currently bound namespace prefixes. The order of the 966 the prefixes in the list has no meaning. If the default namespace is 967 currently bound, '#default' appears in the list. 968 969 =item recognized_string 970 971 Returns the string from the document that was recognized in order to call 972 the current handler. For instance, when called from a start handler, it 973 will give us the the start-tag string. The string is encoded in UTF-8. 974 This method doesn't return a meaningful string inside declaration handlers. 975 976 =item original_string 977 978 Returns the verbatim string from the document that was recognized in 979 order to call the current handler. The string is in the original document 980 encoding. This method doesn't return a meaningful string inside declaration 981 handlers. 982 983 =item default_current 984 985 When called from a handler, causes the sequence of characters that generated 986 the corresponding event to be sent to the default handler (if one is 987 registered). Use of this method is deprecated in favor the recognized_string 988 method, which you can use without installing a default handler. This 989 method doesn't deliver a meaningful string to the default handler when 990 called from inside declaration handlers. 991 992 =item xpcroak(message) 993 994 Concatenate onto the given message the current line number within the 995 XML document plus the message implied by ErrorContext. Then croak with 996 the formed message. 997 998 =item xpcarp(message) 999 1000 Concatenate onto the given message the current line number within the 1001 XML document plus the message implied by ErrorContext. Then carp with 1002 the formed message. 1003 1004 =item current_line 1005 1006 Returns the line number of the current position of the parse. 1007 1008 =item current_column 1009 1010 Returns the column number of the current position of the parse. 1011 1012 =item current_byte 1013 1014 Returns the current position of the parse. 1015 1016 =item base([NEWBASE]); 1017 1018 Returns the current value of the base for resolving relative URIs. If 1019 NEWBASE is supplied, changes the base to that value. 1020 1021 =item context 1022 1023 Returns a list of element names that represent open elements, with the 1024 last one being the innermost. Inside start and end tag handlers, this 1025 will be the tag of the parent element. 1026 1027 =item current_element 1028 1029 Returns the name of the innermost currently opened element. Inside 1030 start or end handlers, returns the parent of the element associated 1031 with those tags. 1032 1033 =item in_element(NAME) 1034 1035 Returns true if NAME is equal to the name of the innermost currently opened 1036 element. If namespace processing is being used and you want to check 1037 against a name that may be in a namespace, then use the generate_ns_name 1038 method to create the NAME argument. 1039 1040 =item within_element(NAME) 1041 1042 Returns the number of times the given name appears in the context list. 1043 If namespace processing is being used and you want to check 1044 against a name that may be in a namespace, then use the generate_ns_name 1045 method to create the NAME argument. 1046 1047 =item depth 1048 1049 Returns the size of the context list. 1050 1051 =item element_index 1052 1053 Returns an integer that is the depth-first visit order of the current 1054 element. This will be zero outside of the root element. For example, 1055 this will return 1 when called from the start handler for the root element 1056 start tag. 1057 1058 =item skip_until(INDEX) 1059 1060 INDEX is an integer that represents an element index. When this method 1061 is called, all handlers are suspended until the start tag for an element 1062 that has an index number equal to INDEX is seen. If a start handler has 1063 been set, then this is the first tag that the start handler will see 1064 after skip_until has been called. 1065 1066 1067 =item position_in_context(LINES) 1068 1069 Returns a string that shows the current parse position. LINES should be 1070 an integer >= 0 that represents the number of lines on either side of the 1071 current parse line to place into the returned string. 1072 1073 =item xml_escape(TEXT [, CHAR [, CHAR ...]]) 1074 1075 Returns TEXT with markup characters turned into character entities. Any 1076 additional characters provided as arguments are also turned into character 1077 references where found in TEXT. 1078 1079 =item parse (SOURCE) 1080 1081 The SOURCE parameter should either be a string containing the whole XML 1082 document, or it should be an open IO::Handle. Only a single document 1083 may be parsed for a given instance of XML::Parser::Expat, so this will croak 1084 if it's been called previously for this instance. 1085 1086 =item parsestring(XML_DOC_STRING) 1087 1088 Parses the given string as an XML document. Only a single document may be 1089 parsed for a given instance of XML::Parser::Expat, so this will die if either 1090 parsestring or parsefile has been called for this instance previously. 1091 1092 This method is deprecated in favor of the parse method. 1093 1094 =item parsefile(FILENAME) 1095 1096 Parses the XML document in the given file. Will die if parsestring or 1097 parsefile has been called previously for this instance. 1098 1099 =item is_defaulted(ATTNAME) 1100 1101 NO LONGER WORKS. To find out if an attribute is defaulted please use 1102 the specified_attr method. 1103 1104 =item specified_attr 1105 1106 When the start handler receives lists of attributes and values, the 1107 non-defaulted (i.e. explicitly specified) attributes occur in the list 1108 first. This method returns the number of specified items in the list. 1109 So if this number is equal to the length of the list, there were no 1110 defaulted values. Otherwise the number points to the index of the 1111 first defaulted attribute name. 1112 1113 =item finish 1114 1115 Unsets all handlers (including internal ones that set context), but expat 1116 continues parsing to the end of the document or until it finds an error. 1117 It should finish up a lot faster than with the handlers set. 1118 1119 =item release 1120 1121 There are data structures used by XML::Parser::Expat that have circular 1122 references. This means that these structures will never be garbage 1123 collected unless these references are explicitly broken. Calling this 1124 method breaks those references (and makes the instance unusable.) 1125 1126 Normally, higher level calls handle this for you, but if you are using 1127 XML::Parser::Expat directly, then it's your responsibility to call it. 1128 1129 =back 1130 1131 =head2 XML::Parser::ContentModel Methods 1132 1133 The element declaration handlers are passed objects of this class as the 1134 content model of the element declaration. They also represent content 1135 particles, components of a content model. 1136 1137 When referred to as a string, these objects are automagicly converted to a 1138 string representation of the model (or content particle). 1139 1140 =over 4 1141 1142 =item isempty 1143 1144 This method returns true if the object is "EMPTY", false otherwise. 1145 1146 =item isany 1147 1148 This method returns true if the object is "ANY", false otherwise. 1149 1150 =item ismixed 1151 1152 This method returns true if the object is "(#PCDATA)" or "(#PCDATA|...)*", 1153 false otherwise. 1154 1155 =item isname 1156 1157 This method returns if the object is an element name. 1158 1159 =item ischoice 1160 1161 This method returns true if the object is a choice of content particles. 1162 1163 1164 =item isseq 1165 1166 This method returns true if the object is a sequence of content particles. 1167 1168 =item quant 1169 1170 This method returns undef or a string representing the quantifier 1171 ('?', '*', '+') associated with the model or particle. 1172 1173 =item children 1174 1175 This method returns undef or (for mixed, choice, and sequence types) 1176 an array of component content particles. There will always be at least 1177 one component for choices and sequences, but for a mixed content model 1178 of pure PCDATA, "(#PCDATA)", then an undef is returned. 1179 1180 =back 1181 1182 =head2 XML::Parser::ExpatNB Methods 1183 1184 The class XML::Parser::ExpatNB is a subclass of XML::Parser::Expat used 1185 for non-blocking access to the expat library. It does not support the parse, 1186 parsestring, or parsefile methods, but it does have these additional methods: 1187 1188 =over 4 1189 1190 =item parse_more(DATA) 1191 1192 Feed expat more text to munch on. 1193 1194 =item parse_done 1195 1196 Tell expat that it's gotten the whole document. 1197 1198 =back 1199 1200 =head1 FUNCTIONS 1201 1202 =over 4 1203 1204 =item XML::Parser::Expat::load_encoding(ENCODING) 1205 1206 Load an external encoding. ENCODING is either the name of an encoding or 1207 the name of a file. The basename is converted to lowercase and a '.enc' 1208 extension is appended unless there's one already there. Then, unless 1209 it's an absolute pathname (i.e. begins with '/'), the first file by that 1210 name discovered in the @Encoding_Path path list is used. 1211 1212 The encoding in the file is loaded and kept in the %Encoding_Table 1213 table. Earlier encodings of the same name are replaced. 1214 1215 This function is automaticly called by expat when it encounters an encoding 1216 it doesn't know about. Expat shouldn't call this twice for the same 1217 encoding name. The only reason users should use this function is to 1218 explicitly load an encoding not contained in the @Encoding_Path list. 1219 1220 =back 1221 1222 =head1 AUTHORS 1223 1224 Larry Wall <F<larry@wall.org>> wrote version 1.0. 1225 1226 Clark Cooper <F<coopercc@netheaven.com>> picked up support, changed the API 1227 for this version (2.x), provided documentation, and added some standard 1228 package features. 1229 1230 =cut
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |