WWW::CurlOO examples

WWW::CurlOO::examples -- sample modules and test code for WWW::CurlOO

Curl::Transport

Extracted from examples/01-curl-transport.pl

This module shows:

buildtime version check

Required features will be missing if libcurl was too old at WWW::CurlOO compilation.

basic inheritance

Use WWW::Curl::* as base for your modules.

exception handling

Most methods die() with a dualvar exception on error. You can compare them numerically, or display as part of a message.

Motivation

recv() and send() methods use non-blocking transfer, this may be very annoying in simple scripts. This wrapper implements blocking send() wrapper, and two recv() wrappers called read() and readline().

MODULE CODE

package Curl::Transport;

use strict;
use warnings;
use WWW::CurlOO::Easy qw(/^CURLE_/);
use base qw(WWW::CurlOO::Easy);

BEGIN {
    if ( WWW::CurlOO::LIBCURL_VERSION_NUM() < 0x071202 ) {
        my $ver = WWW::CurlOO::LIBCURL_VERSION();
        die "curl $ver does not support send() and recv()";
    }
    # alternatively you can write:
    if ( not WWW::CurlOO::Easy->can( "send" )
            or not WWW::CurlOO::Easy->can( "recv" ) ) {
        die "WWW::CurlOO is missing send() and recv()\n"
    }
}

use constant {
    B_URI => 0,
    B_SOCKET => 1,
    B_VEC => 2,
    B_READBUF => 3,
};


# new( URL ) -- get new object
sub new
{
    my $class = shift;
    my $uri = shift;

    # use an array as our object base
    my $base = [ $uri, undef, undef, '' ];

    my $self = $class->SUPER::new( $base );

    $self->setopt( WWW::CurlOO::Easy::CURLOPT_URL, $uri );
    $self->setopt( WWW::CurlOO::Easy::CURLOPT_CONNECT_ONLY, 1 );

    # will die if fails
    $self->perform();

    $self->[ B_SOCKET ] = $self->getinfo( WWW::CurlOO::Easy::CURLINFO_LASTSOCKET );

    # prepare select vector
    my $vec = '';
    vec( $vec, $self->[ B_SOCKET ], 1 ) = 1;
    $self->[ B_VEC ] = $vec;

    return $self;
}

# send( DATA ) -- send some data, wait for socket availability if it cannot
# be sent all at once
sub send($$)
{
    my $self = shift;
    my $data = shift;

    while ( length $data ) {
        # copy, because select overwrites those values
        my $w = $self->[ B_VEC ];

        # wait for write
        select undef, $w, undef, 0;

        # make sure some write bit is set
        next unless vec( $w, $self->[ B_SOCKET ], 1 );

        # actually send the data
        my $sent = $self->SUPER::send( $data );

        # remove from buffer what we sent
        substr $data, 0, $sent, '';
    };
}

# read( SIZE ) -- read SIZE bytes, wait for more data if there wasn't enough
sub read($$)
{
    my $self = shift;
    my $size = shift;

    return '' unless $size > 0;

    while ( length $self->[ B_READBUF ] < $size ) {
        my $r = $self->[ B_VEC ];

        # wait for data
        select $r, undef, undef, 0;

        # make sure some read bit is set
        redo unless vec( $r, $self->[ B_SOCKET ], 1 );

        eval {
            my $l = $self->SUPER::recv( $self->[ B_READBUF ],
                $size - length $self->[ B_READBUF ] );
        };
        if ( $@ ) {
            if ( $@ == CURLE_UNSUPPORTED_PROTOCOL ) {
                my $uri = $self->[ B_URI ];
                warn "Connection to $uri closed: $@\n";
                last;
            } elsif ( $@ == CURLE_AGAIN ) {
                warn "nothing to read, this should not happen";
            } else {
                die $@;
            }
        }
    }

    return substr $self->[ B_READBUF ], 0, $size, '';
}

# readline() -- read until $/
sub readline($)
{
    my $self = shift;

    # we allow changing $/, but we don't support $/ = undef.
    local $/;
    $/ = "\n" unless defined $/;

    my $idx;
    until ( ( $idx = index $self->[ B_READBUF ], $/ ) >= 0 ) {
        my $r = $self->[ B_VEC ];

        # wait for data
        select $r, undef, undef, 0;

        # make sure some read bit is set
        next unless vec( $r, $self->[ B_SOCKET ], 1 );

        # read 256 bytes, should be enough in most cases
        eval {
            $self->SUPER::recv( $self->[ B_READBUF ], 256 );
        };
        if ( $@ ) {
            if ( $@ == CURLE_UNSUPPORTED_PROTOCOL ) {
                my $uri = $self->[ B_URI ];
                warn "Connection to $uri closed: $@\n";
                last;
            } elsif ( $@ == CURLE_AGAIN ) {
                warn "nothing to read, this should not happen";
            } else {
                die $@;
            }
        }
    }

    return substr $self->[ B_READBUF ], 0, ($idx + length $/), '';
}

1;

TEST APPLICATION

Sample application using this module could look like this:

#!perl
use strict;
use warnings;
use Curl::Transport;

my $host = shift @ARGV || "example.com";

my $t = Curl::Transport->new( "http://$host" );
$t->send( "GET / HTTP/1.0\r\n" );
$t->send( "User-Agent: Curl::Transport test\r\n" );
$t->send( "Accept: */*\r\n" );
$t->send( "Host: $host\r\n" );
$t->send( "Connection: Close\r\n" );
$t->send( "\r\n" );

my $length;
{
    local $/ = "\r\n";
    local $_;
    do {
        $_ = $t->readline();
        $length = 0 | $1 if /Content-Length:\s*(\d+)/;
        chomp;
        print "HEADER: $_\n";
    } while ( length $_ );
}

if ( defined $length ) {
    print "Reading $length bytes of data:\n";
    print $t->read( $length );

    print "\nTrying to read one more byte, should fail:\n";
    print $t->read( 1 );
    print "\n";
} else {
    print "Don't know how much to read\n";
    while ( $_ = $t->readline() ) {
        print;
    }
}

printf "Last error: %s\n", $t->error();

Multi::Simple

Extracted from examples/02-multi-simple.pl

This module shows how to use WWW::CurlOO::Multi interface correctly in its simpliest form. Uses perl builtin select(). A more advanced code would use callbacks and some event library instead.

Motivation

Writing a proper multi wrapper code requires a rather good understainding of libcurl multi interface. This code provides a recipie for those who just need something that "simply works".

MODULE CODE

package Multi::Simple;

use strict;
use warnings;
use WWW::CurlOO::Multi;
use base qw(WWW::CurlOO::Multi);

# make new object, preset the data
sub new
{
    my $class = shift;
    my $active = 0;
    return $class->SUPER::new( \$active );
}

# add one handle and count it
sub add_handle($$)
{
    my $self = shift;
    my $easy = shift;

    $$self++;
    $self->SUPER::add_handle( $easy );
}

# perform until some handle finishes, does all the magic needed to make it
# efficient (check as soon as there is some data) without overusing the cpu.
sub get_one($)
{
    my $self = shift;

    if ( my @result = $self->info_read() ) {
        $self->remove_handle( $result[ 1 ] );
        return @result;
    }

    while ( $$self ) {
        my $t = $self->timeout;
        if ( $t != 0 ) {
            $t = 10000 if $t < 0;
            my ( $r, $w, $e ) = $self->fdset;

            select $r, $w, $e, $t / 1000;
        }

        my $ret = $self->perform();
        if ( $$self != $ret ) {
            $$self = $ret;
            if ( my @result = $self->info_read() ) {
                $self->remove_handle( $result[ 1 ] );
                return @result;
            }
        }
    };

    return ();
}

1;

TEST APPLICATION

Sample application using this module looks like this:

#!perl
use strict;
use warnings;
use Multi::Simple;

sub easy
{
    my $uri = shift;

    require WWW::CurlOO::Easy;

    my $easy = WWW::CurlOO::Easy->new( { uri => $uri, body => '' } );
    $easy->setopt( WWW::CurlOO::Easy::CURLOPT_URL(), $uri );
    $easy->setopt( WWW::CurlOO::Easy::CURLOPT_WRITEHEADER(), \$easy->{headers} );
    $easy->setopt( WWW::CurlOO::Easy::CURLOPT_FILE(), \$easy->{body} );
    return $easy;
}

my $multi = Multi::Simple->new();

$multi->add_handle( easy( "http://www.google.com/search?q=perl" ) );
$multi->add_handle( easy( "http://example.com/1" ) );
$multi->add_handle( easy( "http://example.com/2" ) );
$multi->add_handle( easy( "http://example.com/3" ) );
$multi->add_handle( easy( "http://example.com/4" ) );
$multi->add_handle( easy( "http://example.com/5" ) );
$multi->add_handle( easy( "http://example.com/6" ) );

my $ret = 0;
while ( my ( $msg, $easy, $result ) = $multi->get_one() ) {
    print "\nFinished downloading $easy->{uri}: $result:\n";
    print $easy->{headers};
    printf "Body is %d bytes long\n", length $easy->{body};

    $ret = 1 if $result;
}

exit $ret;

Multi::Event

Extracted from examples/03-multi-event.pl

This module shows how to use WWW::CurlOO::Multi interface with an event library, AnyEvent in this case.

Motivation

This is the most efficient method for using WWW::CurlOO::Multi interface, but it requires a really good understanding of it. This code tries to show the quirks found when using event-based programming.

MODULE CODE

package Multi::Event;

use strict;
use warnings;
use AnyEvent;
use WWW::CurlOO::Multi qw(/^CURL_POLL_/ /^CURL_CSELECT_/);
use base qw(WWW::CurlOO::Multi);

BEGIN {
    if ( not WWW::CurlOO::Multi->can( 'CURLMOPT_TIMERFUNCTION' ) ) {
        die "WWW::CurlOO::Multi is missing timer callback,\n" .
            "rebuild WWW::CurlOO with libcurl 7.16.0 or newer\n";
    }
}

sub new
{
    my $class = shift;

    # no base object this time
    # we'll use the default hash

    my $multi = $class->SUPER::new();

    $multi->setopt( WWW::CurlOO::Multi::CURLMOPT_SOCKETFUNCTION,
        \&_cb_socket );
    $multi->setopt( WWW::CurlOO::Multi::CURLMOPT_TIMERFUNCTION,
        \&_cb_timer );

    $multi->{active} = -1;

    return $multi;
}


# socket callback:
# will be called by curl any time events on some socket must be updated
sub _cb_socket
{
    my ( $easy, $socket, $poll ) = @_;
    #warn "on_socket( $socket => $poll )\n";

    # socket callback receives the $easy handle as first argument.
    # Right now $socket belongs to that $easy, but it can be
    # shared with another easy handle if server supports persistent
    # connections.
    # This is why we register socket events inside multi object
    # and not $easy.

    my $multi = $easy->multi;

    # deregister old io events
    delete $multi->{ "r$socket" };
    delete $multi->{ "w$socket" };

    # AnyEvent does not support registering a socket for both reading and
    # writing. This is rarely used so there is no harm in separating
    # the events.

    # register read event
    if ( $poll == CURL_POLL_IN or $poll == CURL_POLL_INOUT ) {
        $multi->{ "r$socket" } = AE::io $socket, 0, sub {
            $multi->socket_action( $socket, CURL_CSELECT_IN );
        };
    }

    # register write event
    if ( $poll == CURL_POLL_OUT or $poll == CURL_POLL_INOUT ) {
        $multi->{ "w$socket" } = AE::io $socket, 1, sub {
            $multi->socket_action( $socket, CURL_CSELECT_OUT );
        };
    }

    return 1;
}


# timer callback:
# It triggers timeout update. Timeout value tells us how soon socket_action
# must be called if there were no actions on sockets. This will allow
# curl to trigger timeout events.
sub _cb_timer
{
    my ( $multi, $timeout_ms ) = @_;
    #warn "on_timer( $timeout_ms )\n";

    # deregister old timer
    delete $multi->{timer};

    my $cb = sub {
        $multi->socket_action( WWW::CurlOO::Multi::CURL_SOCKET_TIMEOUT );
    };

    if ( $timeout_ms < 0 ) {
        # Negative timeout means there is no timeout at all. Normally happens
        # if there are no handles anymore.
        #
        # However, curl_multi_timeout(3) says:
        #
        # Note: if libcurl returns a -1 timeout here, it just means that
        # libcurl currently has no stored timeout value. You must not wait
        # too long (more than a few seconds perhaps) before you call
        # curl_multi_perform() again.

        # XXX: this is missing yet
        #if ( $multi->handles ) {
            $multi->{timer} = AE::timer 10, 10, $cb;
        #}
    } else {
        # This will trigger timeouts if there are any.
        $multi->{timer} = AE::timer $timeout_ms / 1000, 0, $cb;
    }

    return 1;
}

# add one handle and kickstart download
sub add_handle($$)
{
    my $multi = shift;
    my $easy = shift;

    die "easy cannot finish()\n"
        unless $easy->can( 'finish' );

    # Calling socket_action with default arguments will trigger socket callback
    # and register IO events.
    #
    # It _must_ be called _after_ add_handle(); AE will take care of that.
    #
    # We are delaying the call because in some cases socket_action may finish
    # inmediatelly (i.e. there was some error or we used persistent connections
    # and server returned data right away) and it could confuse our
    # application -- it would appear to have finished before it started.
    AE::timer 0, 0, sub {
        $multi->socket_action();
    };

    $multi->SUPER::add_handle( $easy );
}

# perform and call any callbacks that have finished
sub socket_action
{
    my $multi = shift;

    my $active = $multi->SUPER::socket_action( @_ );
    return if $multi->{active} == $active;

    $multi->{active} = $active;

    while ( my ( $msg, $easy, $result ) = $multi->info_read() ) {
        if ( $msg == WWW::CurlOO::Multi::CURLMSG_DONE ) {
            $multi->remove_handle( $easy );
            $easy->finish( $result );
        } else {
            die "I don't know what to do with message $msg.\n";
        }
    }
}

1;

TEST Easy package

Multi::Event requires Easy object to provide finish() method.

package Easy::Event;
use strict;
use warnings;
use WWW::CurlOO::Easy qw(/^CURLOPT_/);
use base qw(WWW::CurlOO::Easy);

sub new
{
    my $class = shift;
    my $uri = shift;
    my $cb = shift;

    my $easy = $class->SUPER::new( { uri => $uri, body => '', cb => $cb } );
    $easy->setopt( CURLOPT_URL, $uri );
    $easy->setopt( CURLOPT_WRITEHEADER, \$easy->{headers} );
    $easy->setopt( CURLOPT_FILE, \$easy->{body} );

    return $easy;
}

sub finish
{
    my ( $easy, $result ) = @_;

    printf "\nFinished downloading %s: %s: %d bytes\n",
        $easy->{uri}, $result, length $easy->{body};

    $easy->{cb}->( $easy->{body} );
}

1;

TEST APPLICATION

#!perl
use strict;
use warnings;
use Easy::Event;
use Multi::Event;
use AnyEvent;

my $multi = Multi::Event->new();
my $cv = AE::cv;


my @uris = (
    "http://www.google.com/search?q=perl",
    "http://www.google.com/search?q=curl",
    "http://www.google.com/search?q=perl+curl",
);


my $i = scalar @uris;
sub done
{
    my $body = shift;

    # process...

    unless ( --$i ) {
        $cv->send;
    }
}

my $timer;
$timer = AE::timer 0, 0.1, sub {
    my $uri = shift @uris;
    $multi->add_handle( Easy::Event->new( $uri, \&done ) );

    unless ( @uris ) {
        undef $timer;
    }
};

$cv->recv;

exit 0;