WWW::CurlOO examples
WWW::CurlOO::examples -- sample modules and test code for WWW::CurlOO
Curl::Transport
Extracted from examples/01-curl-transport.pl
This module shows:
- buildtime version check
-
Required features will be missing if libcurl was too old at WWW::CurlOO compilation.
- basic inheritance
-
Use WWW::Curl::* as base for your modules.
- exception handling
-
Most methods die() with a dualvar exception on error. You can compare them numerically, or display as part of a message.
Motivation
recv() and send() methods use non-blocking transfer, this may be very annoying in simple scripts. This wrapper implements blocking send() wrapper, and two recv() wrappers called read() and readline().
MODULE CODE
package Curl::Transport;
use strict;
use warnings;
use WWW::CurlOO::Easy qw(/^CURLE_/);
use base qw(WWW::CurlOO::Easy);
BEGIN {
if ( WWW::CurlOO::LIBCURL_VERSION_NUM() < 0x071202 ) {
my $ver = WWW::CurlOO::LIBCURL_VERSION();
die "curl $ver does not support send() and recv()";
}
# alternatively you can write:
if ( not WWW::CurlOO::Easy->can( "send" )
or not WWW::CurlOO::Easy->can( "recv" ) ) {
die "WWW::CurlOO is missing send() and recv()\n"
}
}
use constant {
B_URI => 0,
B_SOCKET => 1,
B_VEC => 2,
B_READBUF => 3,
};
# new( URL ) -- get new object
sub new
{
my $class = shift;
my $uri = shift;
# use an array as our object base
my $base = [ $uri, undef, undef, '' ];
my $self = $class->SUPER::new( $base );
$self->setopt( WWW::CurlOO::Easy::CURLOPT_URL, $uri );
$self->setopt( WWW::CurlOO::Easy::CURLOPT_CONNECT_ONLY, 1 );
# will die if fails
$self->perform();
$self->[ B_SOCKET ] = $self->getinfo( WWW::CurlOO::Easy::CURLINFO_LASTSOCKET );
# prepare select vector
my $vec = '';
vec( $vec, $self->[ B_SOCKET ], 1 ) = 1;
$self->[ B_VEC ] = $vec;
return $self;
}
# send( DATA ) -- send some data, wait for socket availability if it cannot
# be sent all at once
sub send($$)
{
my $self = shift;
my $data = shift;
while ( length $data ) {
# copy, because select overwrites those values
my $w = $self->[ B_VEC ];
# wait for write
select undef, $w, undef, 0;
# make sure some write bit is set
next unless vec( $w, $self->[ B_SOCKET ], 1 );
# actually send the data
my $sent = $self->SUPER::send( $data );
# remove from buffer what we sent
substr $data, 0, $sent, '';
};
}
# read( SIZE ) -- read SIZE bytes, wait for more data if there wasn't enough
sub read($$)
{
my $self = shift;
my $size = shift;
return '' unless $size > 0;
while ( length $self->[ B_READBUF ] < $size ) {
my $r = $self->[ B_VEC ];
# wait for data
select $r, undef, undef, 0;
# make sure some read bit is set
redo unless vec( $r, $self->[ B_SOCKET ], 1 );
eval {
my $l = $self->SUPER::recv( $self->[ B_READBUF ],
$size - length $self->[ B_READBUF ] );
};
if ( $@ ) {
if ( $@ == CURLE_UNSUPPORTED_PROTOCOL ) {
my $uri = $self->[ B_URI ];
warn "Connection to $uri closed: $@\n";
last;
} elsif ( $@ == CURLE_AGAIN ) {
warn "nothing to read, this should not happen";
} else {
die $@;
}
}
}
return substr $self->[ B_READBUF ], 0, $size, '';
}
# readline() -- read until $/
sub readline($)
{
my $self = shift;
# we allow changing $/, but we don't support $/ = undef.
local $/;
$/ = "\n" unless defined $/;
my $idx;
until ( ( $idx = index $self->[ B_READBUF ], $/ ) >= 0 ) {
my $r = $self->[ B_VEC ];
# wait for data
select $r, undef, undef, 0;
# make sure some read bit is set
next unless vec( $r, $self->[ B_SOCKET ], 1 );
# read 256 bytes, should be enough in most cases
eval {
$self->SUPER::recv( $self->[ B_READBUF ], 256 );
};
if ( $@ ) {
if ( $@ == CURLE_UNSUPPORTED_PROTOCOL ) {
my $uri = $self->[ B_URI ];
warn "Connection to $uri closed: $@\n";
last;
} elsif ( $@ == CURLE_AGAIN ) {
warn "nothing to read, this should not happen";
} else {
die $@;
}
}
}
return substr $self->[ B_READBUF ], 0, ($idx + length $/), '';
}
1;
TEST APPLICATION
Sample application using this module could look like this:
#!perl
use strict;
use warnings;
use Curl::Transport;
my $host = shift @ARGV || "example.com";
my $t = Curl::Transport->new( "http://$host" );
$t->send( "GET / HTTP/1.0\r\n" );
$t->send( "User-Agent: Curl::Transport test\r\n" );
$t->send( "Accept: */*\r\n" );
$t->send( "Host: $host\r\n" );
$t->send( "Connection: Close\r\n" );
$t->send( "\r\n" );
my $length;
{
local $/ = "\r\n";
local $_;
do {
$_ = $t->readline();
$length = 0 | $1 if /Content-Length:\s*(\d+)/;
chomp;
print "HEADER: $_\n";
} while ( length $_ );
}
if ( defined $length ) {
print "Reading $length bytes of data:\n";
print $t->read( $length );
print "\nTrying to read one more byte, should fail:\n";
print $t->read( 1 );
print "\n";
} else {
print "Don't know how much to read\n";
while ( $_ = $t->readline() ) {
print;
}
}
printf "Last error: %s\n", $t->error();
Multi::Simple
Extracted from examples/02-multi-simple.pl
This module shows how to use WWW::CurlOO::Multi interface correctly in its simpliest form. Uses perl builtin select(). A more advanced code would use callbacks and some event library instead.
Motivation
Writing a proper multi wrapper code requires a rather good understainding of libcurl multi interface. This code provides a recipie for those who just need something that "simply works".
MODULE CODE
package Multi::Simple;
use strict;
use warnings;
use WWW::CurlOO::Multi;
use base qw(WWW::CurlOO::Multi);
# make new object, preset the data
sub new
{
my $class = shift;
my $active = 0;
return $class->SUPER::new( \$active );
}
# add one handle and count it
sub add_handle($$)
{
my $self = shift;
my $easy = shift;
$$self++;
$self->SUPER::add_handle( $easy );
}
# perform until some handle finishes, does all the magic needed to make it
# efficient (check as soon as there is some data) without overusing the cpu.
sub get_one($)
{
my $self = shift;
if ( my @result = $self->info_read() ) {
$self->remove_handle( $result[ 1 ] );
return @result;
}
while ( $$self ) {
my $t = $self->timeout;
if ( $t != 0 ) {
$t = 10000 if $t < 0;
my ( $r, $w, $e ) = $self->fdset;
select $r, $w, $e, $t / 1000;
}
my $ret = $self->perform();
if ( $$self != $ret ) {
$$self = $ret;
if ( my @result = $self->info_read() ) {
$self->remove_handle( $result[ 1 ] );
return @result;
}
}
};
return ();
}
1;
TEST APPLICATION
Sample application using this module looks like this:
#!perl
use strict;
use warnings;
use Multi::Simple;
use WWW::CurlOO::Share qw(:constants);
sub easy
{
my $uri = shift;
my $share = shift;
require WWW::CurlOO::Easy;
my $easy = WWW::CurlOO::Easy->new( { uri => $uri, body => '' } );
$easy->setopt( WWW::CurlOO::Easy::CURLOPT_VERBOSE(), 1 );
$easy->setopt( WWW::CurlOO::Easy::CURLOPT_URL(), $uri );
$easy->setopt( WWW::CurlOO::Easy::CURLOPT_WRITEHEADER(), \$easy->{headers} );
$easy->setopt( WWW::CurlOO::Easy::CURLOPT_FILE(), \$easy->{body} );
$easy->setopt( WWW::CurlOO::Easy::CURLOPT_SHARE(), $share );
return $easy;
}
my $multi = Multi::Simple->new();
my @uri = (
"http://www.google.com/search?q=perl",
"http://www.google.com/search?q=curl",
"http://www.google.com/search?q=perl+curl",
);
{
# share cookies between all handles
my $share = WWW::CurlOO::Share->new();
$share->setopt( CURLSHOPT_SHARE, CURL_LOCK_DATA_COOKIE );
$multi->add_handle( easy( shift ( @uri ), $share ) );
}
my $ret = 0;
while ( my ( $msg, $easy, $result ) = $multi->get_one() ) {
print "\nFinished downloading $easy->{uri}: $result:\n";
printf "Body is %d bytes long\n", length $easy->{body};
print "=" x 80 . "\n";
$ret = 1 if $result;
$multi->add_handle( easy( shift ( @uri ), $easy->share ) ) if @uri;
}
exit $ret;
Multi::Event
Extracted from examples/03-multi-event.pl
This module shows how to use WWW::CurlOO::Multi interface with an event library, AnyEvent in this case.
Motivation
This is the most efficient method for using WWW::CurlOO::Multi interface, but it requires a really good understanding of it. This code tries to show the quirks found when using event-based programming.
MODULE CODE
package Multi::Event;
use strict;
use warnings;
use AnyEvent;
use WWW::CurlOO::Multi qw(/^CURL_POLL_/ /^CURL_CSELECT_/);
use base qw(WWW::CurlOO::Multi);
BEGIN {
if ( not WWW::CurlOO::Multi->can( 'CURLMOPT_TIMERFUNCTION' ) ) {
die "WWW::CurlOO::Multi is missing timer callback,\n" .
"rebuild WWW::CurlOO with libcurl 7.16.0 or newer\n";
}
}
sub new
{
my $class = shift;
# no base object this time
# we'll use the default hash
my $multi = $class->SUPER::new();
$multi->setopt( WWW::CurlOO::Multi::CURLMOPT_SOCKETFUNCTION,
\&_cb_socket );
$multi->setopt( WWW::CurlOO::Multi::CURLMOPT_TIMERFUNCTION,
\&_cb_timer );
$multi->{active} = -1;
return $multi;
}
# socket callback:
# will be called by curl any time events on some socket must be updated
sub _cb_socket
{
my ( $multi, $easy, $socket, $poll ) = @_;
#warn "on_socket( $socket => $poll )\n";
# Right now $socket belongs to that $easy, but it can be
# shared with another easy handle if server supports persistent
# connections.
# This is why we register socket events inside multi object
# and not $easy.
# deregister old io events
delete $multi->{ "r$socket" };
delete $multi->{ "w$socket" };
# AnyEvent does not support registering a socket for both reading and
# writing. This is rarely used so there is no harm in separating
# the events.
# register read event
if ( $poll == CURL_POLL_IN or $poll == CURL_POLL_INOUT ) {
$multi->{ "r$socket" } = AE::io $socket, 0, sub {
$multi->socket_action( $socket, CURL_CSELECT_IN );
};
}
# register write event
if ( $poll == CURL_POLL_OUT or $poll == CURL_POLL_INOUT ) {
$multi->{ "w$socket" } = AE::io $socket, 1, sub {
$multi->socket_action( $socket, CURL_CSELECT_OUT );
};
}
return 1;
}
# timer callback:
# It triggers timeout update. Timeout value tells us how soon socket_action
# must be called if there were no actions on sockets. This will allow
# curl to trigger timeout events.
sub _cb_timer
{
my ( $multi, $timeout_ms ) = @_;
#warn "on_timer( $timeout_ms )\n";
# deregister old timer
delete $multi->{timer};
my $cb = sub {
$multi->socket_action( WWW::CurlOO::Multi::CURL_SOCKET_TIMEOUT );
};
if ( $timeout_ms < 0 ) {
# Negative timeout means there is no timeout at all. Normally happens
# if there are no handles anymore.
#
# However, curl_multi_timeout(3) says:
#
# Note: if libcurl returns a -1 timeout here, it just means that
# libcurl currently has no stored timeout value. You must not wait
# too long (more than a few seconds perhaps) before you call
# curl_multi_perform() again.
# XXX: this is missing yet
#if ( $multi->handles ) {
$multi->{timer} = AE::timer 10, 10, $cb;
#}
} else {
# This will trigger timeouts if there are any.
$multi->{timer} = AE::timer $timeout_ms / 1000, 0, $cb;
}
return 1;
}
# add one handle and kickstart download
sub add_handle($$)
{
my $multi = shift;
my $easy = shift;
die "easy cannot finish()\n"
unless $easy->can( 'finish' );
# Calling socket_action with default arguments will trigger socket callback
# and register IO events.
#
# It _must_ be called _after_ add_handle(); AE will take care of that.
#
# We are delaying the call because in some cases socket_action may finish
# inmediatelly (i.e. there was some error or we used persistent connections
# and server returned data right away) and it could confuse our
# application -- it would appear to have finished before it started.
AE::timer 0, 0, sub {
$multi->socket_action();
};
$multi->SUPER::add_handle( $easy );
}
# perform and call any callbacks that have finished
sub socket_action
{
my $multi = shift;
my $active = $multi->SUPER::socket_action( @_ );
return if $multi->{active} == $active;
$multi->{active} = $active;
while ( my ( $msg, $easy, $result ) = $multi->info_read() ) {
if ( $msg == WWW::CurlOO::Multi::CURLMSG_DONE ) {
$multi->remove_handle( $easy );
$easy->finish( $result );
} else {
die "I don't know what to do with message $msg.\n";
}
}
}
1;
TEST Easy package
Multi::Event requires Easy object to provide finish() method.
package Easy::Event;
use strict;
use warnings;
use WWW::CurlOO::Easy qw(/^CURLOPT_/);
use base qw(WWW::CurlOO::Easy);
sub new
{
my $class = shift;
my $uri = shift;
my $cb = shift;
my $easy = $class->SUPER::new( { uri => $uri, body => '', cb => $cb } );
$easy->setopt( CURLOPT_URL, $uri );
$easy->setopt( CURLOPT_WRITEHEADER, \$easy->{headers} );
$easy->setopt( CURLOPT_FILE, \$easy->{body} );
return $easy;
}
sub finish
{
my ( $easy, $result ) = @_;
printf "\nFinished downloading %s: %s: %d bytes\n",
$easy->{uri}, $result, length $easy->{body};
$easy->{cb}->( $easy->{body} );
}
1;
TEST APPLICATION
#!perl
use strict;
use warnings;
use Easy::Event;
use Multi::Event;
use AnyEvent;
my $multi = Multi::Event->new();
my $cv = AE::cv;
my @uris = (
"http://www.google.com/search?q=perl",
"http://www.google.com/search?q=curl",
"http://www.google.com/search?q=perl+curl",
);
my $i = scalar @uris;
sub done
{
my $body = shift;
# process...
unless ( --$i ) {
$cv->send;
}
}
my $timer;
$timer = AE::timer 0, 0.1, sub {
my $uri = shift @uris;
$multi->add_handle( Easy::Event->new( $uri, \&done ) );
unless ( @uris ) {
undef $timer;
}
};
$cv->recv;
exit 0;