# XML::TreePP - phpMan

## NAME
    [XML::TreePP] -- Pure Perl implementation for parsing/writing XML
    documents

## SYNOPSIS
    parse an XML document from file into hash tree:

        use [XML::TreePP];
        my $tpp = [XML::TreePP]->new();
        my $tree = $tpp->parsefile( "index.rdf" );
        print "Title: ", $tree->{"rdf:RDF"}->{item}->[0]->{title}, "\n";
        print "URL:   ", $tree->{"rdf:RDF"}->{item}->[0]->{link}, "\n";

    write an XML document as string from hash tree:

        use [XML::TreePP];
        my $tpp = [XML::TreePP]->new();
        my $tree = { rss => { channel => { item => [ {
            title   => "The Perl Directory",
            link    => "<http://www.perl.org/>",
        }, {
            title   => "The Comprehensive Perl Archive Network",
            link    => "<http://cpan.perl.org/>",
        } ] } } };
        my $xml = $tpp->write( $tree );
        print $xml;

    get a remote XML document by HTTP-GET and parse it into hash tree:

        use [XML::TreePP];
        my $tpp = [XML::TreePP]->new();
        my $tree = $tpp->parsehttp( GET => "<http://use.perl.org/index.rss>" );
        print "Title: ", $tree->{"rdf:RDF"}->{channel}->{title}, "\n";
        print "URL:   ", $tree->{"rdf:RDF"}->{channel}->{link}, "\n";

    get a remote XML document by HTTP-POST and parse it into hash tree:

        use [XML::TreePP];
        my $tpp = [XML::TreePP]->new( force_array => [qw( item )] );
        my $cgiurl = "<http://search.hatena.ne.jp/keyword>";
        my $keyword = "ajax";
        my $cgiquery = "mode=rss2&word=".$keyword;
        my $tree = $tpp->parsehttp( POST => $cgiurl, $cgiquery );
        print "Link: ", $tree->{rss}->{channel}->{item}->[0]->{link}, "\n";
        print "Desc: ", $tree->{rss}->{channel}->{item}->[0]->{description}, "\n";

## DESCRIPTION
    [XML::TreePP] module parses an XML document and expands it for a hash
    tree. This generates an XML document from a hash tree as the opposite
    way around. This is a pure Perl implementation and requires no modules
    depended. This can also fetch and parse an XML document from remote web
    server like the XMLHttpRequest object does at JavaScript language.

## EXAMPLES
  Parse XML file
    Sample XML document:

        <?xml version="1.0" encoding="UTF-8"?>
        <family name="Kawasaki">
            <father>Yasuhisa</father>
            <mother>Chizuko</mother>
            <children>
                <girl>Shiori</girl>
                <boy>Yusuke</boy>
                <boy>Kairi</boy>
            </children>
        </family>

    Sample program to read a xml file and dump it:

        use [XML::TreePP];
        use [Data::Dumper];
        my $tpp = [XML::TreePP]->new();
        my $tree = $tpp->parsefile( "family.xml" );
        my $text = Dumper( $tree );
        print $text;

    Result dumped:

        $VAR1 = {
            'family' => {
                '-name' => 'Kawasaki',
                'father' => 'Yasuhisa',
                'mother' => 'Chizuko',
                'children' => {
                    'girl' => 'Shiori'
                    'boy' => [
                        'Yusuke',
                        'Kairi'
                    ],
                }
            }
        };

    Details:

        print $tree->{family}->{father};        # the father's given name.

    The prefix '-' is added on every attribute's name.

        print $tree->{family}->{"-name"};       # the family name of the family

    The array is used because the family has two boys.

        print $tree->{family}->{children}->{boy}->[1];  # The second boy's name
        print $tree->{family}->{children}->{girl};      # The girl's name

  Text node and attributes:
    If a element has both of a text node and attributes or both of a text
    node and other child nodes, value of a text node is moved to "#text"
    like child nodes.

        use [XML::TreePP];
        use [Data::Dumper];
        my $tpp = [XML::TreePP]->new();
        my $source = '<span class="author">Kawasaki Yusuke</span>';
        my $tree = $tpp->parse( $source );
        my $text = Dumper( $tree );
        print $text;

    The result dumped is following:

        $VAR1 = {
            'span' => {
                '-class' => 'author',
                '#text'  => 'Kawasaki Yusuke'
            }
        };

    The special node name of "#text" is used because this elements has
    attribute(s) in addition to the text node. See also "text_node_key"
    option.

## METHODS
  new
    This constructor method returns a new [XML::TreePP] object with %options.

        $tpp = [XML::TreePP]->new( %options );

  set
    This method sets a option value for "option_name". If $option_value is
    not defined, its option is deleted.

        $tpp->set( option_name => $option_value );

    See OPTIONS section below for details.

  get
    This method returns a current option value for "option_name".

        $tpp->get( 'option_name' );

  parse
    This method reads an XML document by string and returns a hash tree
    converted. The first argument is a scalar or a reference to a scalar.

            $tree = $tpp->parse( $source );

  parsefile
    This method reads an XML document by file and returns a hash tree
    converted. The first argument is a filename.

        $tree = $tpp->parsefile( $file );

  parsehttp
    This method receives an XML document from a remote server via HTTP and
    returns a hash tree converted.

        $tree = $tpp->parsehttp( $method, $url, $body, $head );

    $method is a method of HTTP connection: GET/POST/PUT/DELETE $url is an
    URI of an XML file. $body is a request body when you use POST method.
    $head is a request headers as a hash ref. [LWP::UserAgent] module or
    [HTTP::Lite] module is required to fetch a file.

        ( $tree, $xml, $code ) = $tpp->parsehttp( $method, $url, $body, $head );

    In array context, This method returns also raw XML document received and
    HTTP response's status code.

  write
    This method parses a hash tree and returns an XML document as a string.

        $source = $tpp->write( $tree, $encode );

    $tree is a reference to a hash tree.

  writefile
    This method parses a hash tree and writes an XML document into a file.

        $tpp->writefile( $file, $tree, $encode );

    $file is a filename to create. $tree is a reference to a hash tree.

## OPTIONS FOR PARSING XML
    This module accepts option parameters following:

  force_array
    This option allows you to specify a list of element names which should
    always be forced into an array representation.

        $tpp->set( force_array => [ 'rdf:li', 'item', '-xmlns' ] );

    The default value is null, it means that context of the elements will
    determine to make array or to keep it scalar or hash. Note that the
    special wildcard name '*' means all elements.

  force_hash
    This option allows you to specify a list of element names which should
    always be forced into an hash representation.

        $tpp->set( force_hash => [ 'item', 'image' ] );

    The default value is null, it means that context of the elements will
    determine to make hash or to keep it scalar as a text node. See also
    "text_node_key" option below. Note that the special wildcard name '*'
    means all elements.

  cdata_scalar_ref
    This option allows you to convert a cdata section into a reference for
    scalar on parsing an XML document.

        $tpp->set( cdata_scalar_ref => 1 );

    The default value is false, it means that each cdata section is
    converted into a scalar.

  user_agent
    This option allows you to specify a HTTP_USER_AGENT string which is used
    by parsehttp() method.

        $tpp->set( user_agent => 'Mozilla/4.0 (compatible; ...)' );

    The default string is 'XML-TreePP/#.##', where '#.##' is substituted
    with the version number of this library.

  http_lite
    This option forces pasrsehttp() method to use a [HTTP::Lite] instance.

        my $http = [HTTP::Lite]->new();
        $tpp->set( http_lite => $http );

  lwp_useragent
    This option forces parsehttp() method to use a [LWP::UserAgent] instance.

        my $ua = [LWP::UserAgent]->new();
        $ua->timeout( 60 );
        $ua->env_proxy;
        $tpp->set( lwp_useragent => $ua );

    You may use this with [LWP::UserAgent::WithCache].

  base_class
    This blesses class name for each element's hashref. Each class is named
    straight as a child class of it parent class.

        $tpp->set( base_class => 'MyElement' );
        my $xml  = '<root><parent><child key="val">text</child></parent></root>';
        my $tree = $tpp->parse( $xml );
        print ref $tree->{root}->{parent}->{child}, "\n";

    A hash for <child> element above is blessed to
    "[MyElement::root::parent::child]" class. You may use this with
    [Class::Accessor].

  elem_class
    This blesses class name for each element's hashref. Each class is named
    horizontally under the direct child of "MyElement".

        $tpp->set( base_class => 'MyElement' );
        my $xml  = '<root><parent><child key="val">text</child></parent></root>';
        my $tree = $tpp->parse( $xml );
        print ref $tree->{root}->{parent}->{child}, "\n";

    A hash for <child> element above is blessed to "[MyElement::child]" class.

  xml_deref
    This option dereferences the numeric character references, like &#xEB;,
    &#28450;, etc., in an XML document when this value is true.

        $tpp->set( xml_deref => 1 );

    Note that, for security reasons and your convenient, this module
    dereferences the predefined character entity references, &amp;, &lt;,
    &gt;, &apos; and &quot;, and the numeric character references up to
    U+007F without xml_deref per default.

  require_xml_decl
    This option requires XML declaration at the top of XML document to
    parse.

        $tpp->set( require_xml_decl => 1 );

    This will die when <?xml .../?> declration not found.

## OPTIONS FOR WRITING XML
  first_out
    This option allows you to specify a list of element/attribute names
    which should always appears at first on output XML document.

        $tpp->set( first_out => [ 'link', 'title', '-type' ] );

    The default value is null, it means alphabetical order is used.

  last_out
    This option allows you to specify a list of element/attribute names
    which should always appears at last on output XML document.

        $tpp->set( last_out => [ 'items', 'item', 'entry' ] );

  indent
    This makes the output more human readable by indenting appropriately.

        $tpp->set( indent => 2 );

    This doesn't strictly follow the XML specification but does looks nice.

  xml_decl
    This module inserts an XML declaration on top of the XML document
    generated per default. This option forces to change it to another or
    just remove it.

        $tpp->set( xml_decl => '' );

  output_encoding
    This option allows you to specify a encoding of the XML document
    generated by write/writefile methods.

        $tpp->set( output_encoding => 'UTF-8' );

    On Perl 5.8.0 and later, you can select it from every encodings
    supported by Encode.pm. On Perl 5.6.x and before with Jcode.pm, you can
    use "Shift_JIS", "EUC-JP", "ISO-2022-JP" and "UTF-8". The default value
    is "UTF-8" which is recommended encoding.

  empty_element_tag_end
        $tpp->set( empty_element_tag_end => '>' );

    Set characters which close empty tag. The default value is ' />'.

## OPTIONS FOR BOTH
  utf8_flag
    This makes utf8 flag on for every element's value parsed and makes it on
    for the XML document generated as well.

        $tpp->set( utf8_flag => 1 );

    Perl 5.8.1 or later is required to use this.

  attr_prefix
    This option allows you to specify a prefix character(s) which is
    inserted before each attribute names.

        $tpp->set( attr_prefix => '@' );

    The default character is '-'. Or set '@' to access attribute values like
    E4X, ECMAScript for XML. Zero-length prefix '' is available as well, it
    means no prefix is added.

  text_node_key
    This option allows you to specify a hash key for text nodes.

        $tpp->set( text_node_key => '#text' );

    The default key is "#text".

  ignore_error
    This module calls [Carp::croak] function on an error per default. This
    option makes all errors ignored and just returns.

        $tpp->set( ignore_error => 1 );

  use_ixhash
    This option keeps the order for each element appeared in XML.
    [Tie::IxHash] module is required.

        $tpp->set( use_ixhash => 1 );

    This makes parsing performance slow. (about 100% slower than default)

## AUTHOR
    Yusuke Kawasaki, <http://www.kawa.net/>

## REPOSITORY
    <https://github.com/kawanet/XML-TreePP>

## COPYRIGHT
    The following copyright notice applies to all the files provided in this
    distribution, including binary files, unless explicitly noted otherwise.

    Copyright 2006-2010 Yusuke Kawasaki

## LICENSE
    This library is free software; you can redistribute it and/or modify it
    under the same terms as Perl itself.

