The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.

NAME

PDL::CCS::Nd - N-dimensional sparse pseudo-PDLs

SYNOPSIS

use PDL;
use PDL::CCS::Nd;

##---------------------------------------------------------------------
## Example data

$missing = 0;                                   ##-- missing values
$dense   = random(@dims);                       ##-- densely encoded pdl
$dense->where(random(@dims)<=0.95) .= $missing; ##   ... made sparse

$whichND = $dense->whichND;                     ##-- which values are present?
$nzvals  = $dense->indexND($whichND);           ##-- ... and what are they?


##---------------------------------------------------------------------
## Constructors etc.

$ccs = PDL::CCS:Nd->newFromDense($dense,%args);           ##-- construct from dense matrix
$ccs = PDL::CCS:Nd->newFromWhich($whichND,$nzvals,%args); ##-- construct from index+value pairs

$ccs = $dense->toccs();                ##-- ensure PDL::CCS::Nd-hood
$ccs = $ccs->toccs();                  ##-- ... analogous to PDL::topdl()
$ccs = $dense->toccs($missing,$flags); ##-- ... with optional arguments

$ccs2 = $ccs->copy();                  ##-- copy constructor
$ccs2 = $ccs->copyShallow();           ##-- shallow copy, mainly for internal use
$ccs2 = $ccs->shadow(%args);           ##-- flexible copy method, for internal use

##---------------------------------------------------------------------
## Maintenance & Decoding

$ccs = $ccs->recode();                 ##-- remove missing values from stored VALS
$ccs = $ccs->sortwhich();              ##-- internal use only

$dense2 = $ccs->decode();              ##-- extract to a (new) dense matrix

$dense2 = $ccs->todense();             ##-- ensure dense storage
$dense2 = $dense2->todense();          ##-- ... analogous to PDL::topdl()

##---------------------------------------------------------------------
## PDL API: Basic Properties

##---------------------------------------
## Type conversion & Checking
$ccs2 = $ccs->convert($type);
$ccs2 = $ccs->byte;
$ccs2 = $ccs->short;
$ccs2 = $ccs->ushort;
$ccs2 = $ccs->long;
$ccs2 = $ccs->longlong;
$ccs2 = $ccs->float;
$ccs2 = $ccs->double;

##---------------------------------------
## Dimensions
@dims  = $ccs->dims();
$ndims = $ccs->ndims();
$dim   = $ccs->dim($dimi);
$nelem = $ccs->nelem;
$bool  = $ccs->isnull;
$bool  = $ccs->isempty;

##---------------------------------------
## Inplace & Dataflow
$ccs  = $ccs->inplace();
$bool = $ccs->is_inplace;
$bool = $ccs->set_inplace($bool);
$ccs  = $ccs->sever;

##---------------------------------------
## Bad Value Handling

$bool = $ccs->bad_is_missing();          ##-- treat BAD values as missing?
$bool = $ccs->bad_is_missing($bool);
$ccs  = $ccs->badmissing();              ##-- ... a la inplace()

$bool = $ccs->nan_is_missing();          ##-- treat NaN values as missing?
$bool = $ccs->nan_is_missing($bool);
$ccs  = $ccs->nanmissing();              ##-- ... a la inplace()

$ccs2 = $ccs->setnantobad();
$ccs2 = $ccs->setbadtonan();
$ccs2 = $ccs->setbadtoval($val);
$ccs2 = $ccs->setvaltobad($val);

##---------------------------------------------------------------------
## PDL API: Dimension Shuffling

$ccs2 = $ccs->dummy($vdimi,$size);
$ccs2 = $ccs->reorder(@vdims);
$ccs2 = $ccs->xchg($vdim1,$vdim2);
$ccs2 = $ccs->mv($vdimFrom,$vdimTo);
$ccs2 = $ccs->transpose();

##---------------------------------------------------------------------
## PDL API: Indexing

$nzi   = $ccs->indexNDi($ndi);              ##-- guts for indexing methods
$ndi   = $ccs->n2oned($ndi);                ##-- returns 1d pseudo-index for $ccs

$ivals = $ccs->indexND($ndi);
$ivals = $ccs->index2d($xi,$yi);
$ivals = $ccs->index($flati);               ##-- buggy: no pseudo-threading!
$ccs2  = $ccs->dice_axis($vaxis,$vaxis_ix);

$nzi   = $ccs->xindex1d($xi);               ##-- nz-indices along 0th dimension
$nzi   = $ccs->pxindex1d($dimi,$xi);        ##-- ... or any dimension, using ptr()
$nzi   = $ccs->xindex2d($xi,$yi);           ##-- ... or for Cartesian product on 2d matrix

$ccs2  = $ccs->xsubset1d($xi);              ##-- subset along 0th dimension
$ccs2  = $ccs->pxsubset1d($dimi,$xi);       ##-- ... or any dimension, using ptr()
$ccs2  = $ccs->xsubset2d($xi,$yi);          ##-- ... or for Cartesian product on 2d matrix

$whichND = $ccs->whichND();
$vals    = $ccs->whichVals();               ##-- like $ccs->indexND($ccs->whichND), but faster
$which   = $ccs->which()

$value = $ccs->at(@index);
$ccs   = $ccs->set(@index,$value);

##---------------------------------------------------------------------
## PDL API: Ufuncs

$ccs2 = $ccs->prodover;
$ccs2 = $ccs->dprodover;
$ccs2 = $ccs->sumover;
$ccs2 = $ccs->dsumover;
$ccs2 = $ccs->andover;
$ccs2 = $ccs->orover;
$ccs2 = $ccs->bandover;
$ccs2 = $ccs->borover;
$ccs2 = $ccs->maximum;
$ccs2 = $ccs->minimum;
$ccs2 = $ccs->maximum_ind; ##-- -1 indicates "missing" value is maximal
$ccs2 = $ccs->minimum_ind; ##-- -1 indicates "missing" value is minimal
$ccs2 = $ccs->nbadover;
$ccs2 = $ccs->ngoodover;
$ccs2 = $ccs->nnz;

$sclr = $ccs->prod;
$sclr = $ccs->dprod;
$sclr = $ccs->sum;
$sclr = $ccs->dsum;
$sclr = $ccs->nbad;
$sclr = $ccs->ngood;
$sclr = $ccs->min;
$sclr = $ccs->max;
$bool = $ccs->any;
$bool = $ccs->all;

##---------------------------------------------------------------------
## PDL API: Unary Operations         (Overloaded)

$ccs2 = $ccs->bitnot;                $ccs2 = ~$ccs;
$ccs2 = $ccs->not;                   $ccs2 = !$ccs;
$ccs2 = $ccs->sqrt;
$ccs2 = $ccs->abs;
$ccs2 = $ccs->sin;
$ccs2 = $ccs->cos;
$ccs2 = $ccs->exp;
$ccs2 = $ccs->log;
$ccs2 = $ccs->log10;

##---------------------------------------------------------------------
## PDL API: Binary Operations (missing is annihilator)
##  + $b may be a perl scalar, a dense PDL, or a PDL::CCS::Nd object
##  + $c is always returned as a PDL::CCS::Nd ojbect

##---------------------------------------
## Arithmetic
$c = $ccs->plus($b);         $c = $ccs1 +  $b;
$c = $ccs->minus($b);        $c = $ccs1 -  $b;
$c = $ccs->mult($b);         $c = $ccs1 *  $b;
$c = $ccs->divide($b);       $c = $ccs1 /  $b;
$c = $ccs->modulo($b);       $c = $ccs1 %  $b;
$c = $ccs->power($b);        $c = $ccs1 ** $b;

##---------------------------------------
## Comparisons
$c = $ccs->gt($b);           $c = ($ccs  >  $b);
$c = $ccs->ge($b);           $c = ($ccs  >= $b);
$c = $ccs->lt($b);           $c = ($ccs  <  $b);
$c = $ccs->le($b);           $c = ($ccs  <= $b);
$c = $ccs->eq($b);           $c = ($ccs  == $b);
$c = $ccs->ne($b);           $c = ($ccs  != $b);
$c = $ccs->spaceship($b);    $c = ($ccs <=> $b);

##---------------------------------------
## Bitwise Operations
$c = $ccs->and2($b);          $c = ($ccs &  $b);
$c = $ccs->or2($b);           $c = ($ccs |  $b);
$c = $ccs->xor($b);           $c = ($ccs ^  $b);
$c = $ccs->shiftleft($b);     $c = ($ccs << $b);
$c = $ccs->shiftright($b);    $c = ($ccs >> $b);

##---------------------------------------
## Matrix Operations
$c = $ccs->inner($b);
$c = $ccs->matmult($b);       $c = $ccs x $b;
$c_dense = $ccs->matmult2d_sdd($b_dense, $zc);
$c_dense = $ccs->matmult2d_zdd($b_dense);

$vnorm = $ccs->vnorm($pdimi);
$vcos  = $ccs->vcos_zdd($b_dense);
$vcos  = $ccs->vcos_pzd($b_ccs);

##---------------------------------------
## Other Operations
$ccs->rassgn($b);             $ccs .= $b;
$str = $ccs->string();        $str  = "$ccs";

##---------------------------------------------------------------------
## Indexing Utilities


##---------------------------------------------------------------------
## Low-Level Object Access

$num_v_per_p = $ccs->_ccs_nvperp;                                  ##-- num virtual / num physical
$pdims    = $ccs->pdims;            $vdims    = $ccs->vdims;       ##-- physical|virtual dim pdl
$nelem    = $ccs->nelem_p;          $nelem    = $ccs->nelem_v;     ##-- physical|virtual nelem
$nstored  = $ccs->nstored_p;        $nstored  = $ccs->nstored_v;   ##-- physical|virtual Nnz+1
$nmissing = $ccs->nmissing_p;       $nmissing = $ccs->nmissing_v;  ##-- physical|virtual nelem-Nnz

$ccs = $ccs->make_physically_indexed();        ##-- ensure all dimensions are physically indexed

$bool = $ccs->allmissing();                    ##-- are all values missing?

$missing_val = $ccs->missing;                  ##-- get missing value
$missing_val = $ccs->missing($missing_val);    ##-- set missing value
$ccs         = $ccs->_missing($missing_val);   ##-- ... returning the object

$whichND_phys = $ccs->_whichND();              ##-- get/set physical indices
$whichND_phys = $ccs->_whichND($whichND_phys);

$nzvals_phys  = $ccs->_nzvals();               ##-- get/set physically indexed values
$nzvals_phys  = $ccs->_nzvals($vals_phys);

$vals_phys    = $ccs->_vals();                 ##-- get/set physically indexed values
$vals_phys    = $ccs->_vals($vals_phys);

$bool         = $ccs->hasptr($pdimi);          ##-- check for cached Harwell-Boeing pointer
($ptr,$ptrix) = $ccs->ptr($pdimi);             ##-- ... get one, caching for later use
($ptr,$ptrix) = $ccs->getptr($pdimi);          ##-- ... compute one, regardless of cache
($ptr,$ptrix) = $ccs->setptr($pdimi,$p,$pix);  ##-- ... set a cached pointer
$ccs->clearptr($pdimi);                        ##-- ... clear a cached pointer
$ccs->clearptrs();                             ##-- ... clear all cached pointers

$flags = $ccs->flags();                        ##-- get/set object-local flags
$flags = $ccs->flags($flags);

$density = $ccs->density;                      ##-- get object density
$crate   = $ccs->compressionRate;              ##-- get compression rate

DESCRIPTION

PDL::CCS::Nd provides an object-oriented implementation of sparse N-dimensional vectors & matrices using a set of low-level PDLs to encode non-missing values. Currently, only a portion of the PDL API is implemented.

GLOBALS

The following package-global variables are defined:

Block Size Constants

$BINOP_BLOCKSIZE_MIN = 1;
$BINOP_BLOCKSIZE_MAX = 0;

Minimum (maximum) block size for block-wise incremental computation of binary operations. Zero or undef indicates no minimum (maximum).

Object Structure

PDL::CCS::Nd object are implemented as perl ARRAY-references. For more intuitive access to object components, the following package-global variables can be used as array indices to access internal object structure:

$PDIMS

Indexes a pdl(long,$NPdims) of physically indexed dimension sizes:

$ccs->[$PDIMS]->at($pdim_i) == $dimSize_i
$VDIMS

Indexes a pdl(long,$NVdims) of "virtual" dimension sizes:

$ccs->[$VDIMS]->at($vdim_i) == / -$vdimSize_i    if $vdim_i is a dummy dimension
                               \  $pdim_i        otherwise

The $VDIMS piddle is used for dimension-shuffling transformations such as xchg() and reorder(), as well as for dummy().

$WHICH

Indexes a pdl(long,$NPdims,$Nnz) of the "physical indices" of all non-missing values in the non-dummy dimensions of the corresponding dense matrix. Vectors in $WHICH are guaranteed to be sorted in lexicographic order. If your $missing value is zero, and if your qsortvec() function works, it should be the case that:

all( $ccs->[$WHICH] == $dense->whichND->qsortvec )

A "physically indexed dimension" is just a dimension corresponding tp a single column of the $WHICH pdl, whereas a dummy dimension does not correspond to any physically indexed dimension.

$VALS

Indexes a vector pdl($valType, $Nnz+1) of all values in the sparse matrix, where $Nnz is the number of non-missing values in the sparse matrix. Non-final elements of the $VALS piddle are interpreted as the values of the corresponding indices in the $WHICH piddle:

all( $ccs->[$VALS]->slice("0:-2") == $dense->indexND($ccs->[$WHICH]) )

The final element of the $VALS piddle is referred to as "$missing", and represents the value of all elements of the dense physical matrix whose indices are not explicitly listed in the $WHICH piddle:

all( $ccs->[$VALS]->slice("-1") == $dense->flat->index(which(!$dense)) )
$PTRS

Indexes an array of arrays containing Harwell-Boeing "pointer" piddle pairs for the corresponding physically indexed dimension. For a physically indexed dimension $d of size $N, $ccs->[$PTRS][$d] (if it exists) is a pair [$ptr,$ptrix] as returned by PDL::CCS::Utils::ccs_encode_pointers($WHICH,$N), which are such that:

$ptr

$ptr is a pdl(long,$N+1) containing the offsets in $ptrix corresponding to the first non-missing value in the dimension $d. For all $i, 0 <= $i < $N, $ptr($i) contains the index of the first non-missing value (if any) from column $i of $dense(...,N,...) encoded in the $WHICH piddle. $ptr($N+1) contains the number of physically indexed cells in the $WHICH piddle.

$ptrix

Is an index piddle into dim(1) of $WHICH rsp. dim(0) of $VALS whose key positions correspond to the offsets listed in $ptr. The point here is that:

$WHICH->dice_axis(1,$ptrix)

is guaranteed to be primarily sorted along the pointer dimension $d, and stably sorted along all other dimensions, e.g. should be identical to:

$WHICH->mv($d,0)->qsortvec->mv(0,$d)
$FLAGS

Indexes a perl scalar containing some object-local flags. See "Object Flags" for details.

$USER

Indexes the first unused position in the object array. If you derive a class from PDL::CCS::Nd, you should use this position to place any new object-local data.

Object Flags

The following object-local constants are defined as bitmask flags:

$CCSND_BAD_IS_MISSING

Bitmask of the "bad-is-missing" flag. See the bad_is_missing() method.

$CCSND_NAN_IS_MISSING

Bitmask of the "NaN-is-missing" flag. See the nan_is_missing() method.

$CCSND_INPLACE

Bitmask of the "inplace" flag. See PDL::Core for details.

$CCSND_FLAGS_DEFAULT

Default flags for new objects.

METHODS

Constructors, etc.

$class_or_obj->newFromDense($dense,$missing,$flags)
Signature ($class_or_obj; dense(N1,...,NNdims); missing(); int flags)

Class method. Create and return a new PDL::CCS::Nd object from a dense N-dimensional PDL $dense. If specified, $missing is used as the value for "missing" elements, and $flags are used to initialize the object-local flags.

$missing defaults to BAD if the bad flag of $dense is set, otherwise $missing defaults to zero.

$ccs->fromDense($dense,$missing,$flags)
Signature ($ccs; dense(N1,...,NNdims); missing(); int flags)

Object method. Populate a sparse matrix object from a dense piddle $dense. See newFromDense().

$class_or_obj->newFromWhich($whichND,$nzvals,%options)
Signature ($class_or_obj; int whichND(Ndims,Nnz); nzvals(Nnz+1); %options)

Class method. Create and return a new PDL::CCS::Nd object from a set of indices $whichND of non-missing elements in a (hypothetical) dense piddle and a vector $nzvals of the corresponding values. Known %options:

sorted  => $bool,    ##-- if true, $whichND is assumed to be pre-sorted
steal   => $bool,    ##-- if true, $whichND and $nzvals are used literally (formerly implied 'sorted')
                     ##    + in this case, $nzvals should really be: $nzvals->append($missing)
pdims   => $pdims,   ##-- physical dimension list; default guessed from $whichND (alias: 'dims')
vdims   => $vdims,   ##-- virtual dims (default: sequence($nPhysDims)); alias: 'xdims'
missing => $missing, ##-- default: BAD if $nzvals->badflag, 0 otherwise
flags   => $flags    ##-- flags
$ccs->fromWhich($whichND,$nzvals,%options)

Object method. Guts for newFromWhich().

$a->toccs($missing,$flags)

Wrapper for newFromDense(). Return a PDL::CCS::Nd object for any piddle or perl scalar $a. If $a is already a PDL::CCS::Nd object, just returns $a. This method gets exported into the PDL namespace for ease of use.

$ccs = $ccs->copy()

Full copy constructor.

$ccs2 = $ccs->copyShallow()

Shallow copy constructor, used e.g. by dimension-shuffling transformations. Copied components:

$PDIMS, @$PTRS, @{$PTRS->[*]}, $FLAGS

Referenced components:

$VDIMS, $WHICH, $VALS,  $PTRS->[*][*]
$ccs2 = $ccs1->shadow(%args)

Flexible constructor for computed PDL::CCS::Nd objects. Known %args:

to    => $ccs2,    ##-- default: new
pdims => $pdims2,  ##-- default: $pdims1->pdl  (alias: 'dims')
vdims => $vdims2,  ##-- default: $vdims1->pdl  (alias: 'xdims')
ptrs  => \@ptrs2,  ##-- default: []
which => $which2,  ##-- default: undef
vals  => $vals2,   ##-- default: undef
flags => $flags,   ##-- default: $flags1

Maintenance & Decoding

$ccs = $ccs->recode()

Recodes the PDL::CCS::Nd object, removing any missing values from its $VALS piddle.

$ccs = $ccs->sortwhich()

Lexicographically sorts $ccs->[$WHICH], altering $VALS accordingly. Clears $PTRS.

$dense = $ccs->decode()
$dense = $ccs->decode($dense)

Decode a PDL::CCS::Nd object to a dense piddle. Dummy dimensions in $ccs should be created as dummy dimensions in $dense.

$dense = $a->todense()

Ensures that $a is not a PDL::CCS::Nd by wrapping decode(). For PDLs or perl scalars, just returns $a.

PDL API: Basic Properties

The following basic PDL API methods are implemented and/or wrapped for PDL::CCS::Nd objects:

Type Checking & Conversion

type, convert, byte, short, ushort, long, double

Type-checking and conversion routines are passed on to the $VALS sub-piddle.

Dimension Access

dims, dim, getdim, ndims, getndims, nelem, isnull, isempty

Note that nelem() returns the number of hypothetically addressable cells -- the number of cells in the corresponding dense matrix, rather than the number of non-missing elements actually stored.

Inplace Operations

set_inplace($bool), is_inplace(), inplace()

Dataflow

sever

Bad Value Handling

setnantobad, setbadtonan, setbadtoval, setvaltobad

See also the bad_is_missing() and nan_is_missing() methods, below.

PDL API: Dimension Shuffling

The following dimension-shuffling methods are supported, and should be compatible to their PDL counterparts:

dummy($vdimi)
dummy($vdimi, $size)

Insert a "virtual" dummy dimension of size $size at dimension index $vdimi.

reorder(@vdim_list)

Reorder dimensions according to @vdim_list.

xchg($vdim1,$vdim2)

Exchange two dimensions.

mv($vdimFrom, $vdimTo)

Move a dimension to another position, shoving remaining dimensions out of the way to make room.

transpose()

Always copies, unlike xchg(). Also unlike xchg(), works for 1d row-vectors.

PDL API: Indexing

indexNDi($ndi)
Signature: ($ccs; int ndi(NVdims,Nind); int [o]nzi(Nind))

Guts for indexing methods. Given an N-dimensional index piddle $ndi, return a 1d index vector into $VALS for the corresponding values. Missing values are returned in $nzi as $Nnz == $ccs->_nnz_p;

Uses PDL::VectorValues::vsearchvec() internally, so expect O(Ndims * log(Nnz)) complexity. Although the theoretical complexity is tough to beat, this method could be made much faster in the usual (read "sparse") case by an intelligent use of $PTRS if and when available.

indexND($ndi)
index2d($xi,$yi)

Should be mostly compatible to the PDL functions of the same names, but without any boundary handling.

index($flati)

Implicitly flattens the source pdl. This ought to be fixed.

dice_axis($axis_v, $axisi)

Should be compatible with the PDL function of the same name. Returns a new PDL::CCS::Nd object which should participate in dataflow.

xindex1d($xi)

Get non-missing indices for any element of $xi along 0th dimension; $xi must be sorted in ascending order.

pxindex1d($dimi,$xi)

Get non-missing indices for any element of $xi along physically indexed dimension $dimi, using "ptr" in ptr($dimi). $xi must be sorted in ascending order.

xindex2d($xi,$yi)

Get non-missing indices for any element in Cartesian product ($xi x $yi) for 2d sparse matrix. $xi and $yi must be sorted in ascending order.

xsubset1d($xi)

Returns a subset object similar to dice_axis(0,$x), but without renumbering of indices along the diced dimension; $xi must be sorted in ascending order.

pxsubset1d($dimi,$xi)

Returns a subset object similar to dice_axis($dimi,$x), but without renumbering of indices along the diced dimension; $xi must be sorted in ascending order.

xsubset2d($xi,$yi)

Returns a subset object similar to indexND( $xi->slice("*1,")->cat($yi)->clump(2)->xchg(0,1) ), but without renumbering of indices; $xi and $yi must be sorted in ascending order.

n2oned($ndi)

Returns a 1d pseudo-index, used for implementation of which(), etc.

whichND()

Should behave mostly like the PDL function of the same name.

Just returns the literal $WHICH piddle if possible: beware of dataflow! Indices are NOT guaranteed to be returned in any surface-logical order, although physically indexed dimensions should be sorted in physical-lexicographic order.

whichVals()

Returns $VALS indexed to correspond to the indices returned by whichND(). The only reason to use whichND() and whichVals() rather than $WHICH and $VALS would be a need for physical representations of dummy dimension indices: try to avoid it if you can.

which()

As for the builtin PDL function.

at(@index)

Return a perl scalar corresponding to the Nd index @index.

set(@index, $value)

Set a non-missing value at index @index to $value. barf()s if @index points to a missing value.

Ufuncs

The following functions from PDL::Ufunc are implemented, and ought to handle missing values correctly (i.e. as their dense counterparts would):

prodover
prod
dprodover
dprod
sumover
sum
dsumover
dsum
andover
orover
bandover
borover
maximum
maximum_ind ##-- goofy if "missing" value is maximal
max
minimum
minimum_ind ##-- goofy if "missing" value is minimal
min
nbadover
nbad
ngoodover
ngood
nnz
any
all

Some Ufuncs are still unimplemented. see PDL::CCS::Ufunc for details.

Unary Operations

The following unary operations are supported:

FUNCTION   OVERLOADS
bitnot      ~
not         !
sqrt
abs
sin
cos
exp
log
log10

Note that any pointwise unary operation can be performed directly on the $VALS piddle. You can wrap such an operation MY_UNARY_OP on piddles into a PDL::CCS::Nd method using the idiom:

package PDL::CCS::Nd;
*MY_UNARY_OP = _unary_op('MY_UNARY_OP', PDL->can('MY_UNARY_OP'));

Note also that unary operations may change the "missing" value associated with the sparse matrix. This is easily seen to be the Right Way To Do It if you consider unary "not" over a very sparse (say 99% missing) binary-valued matrix: is is much easier and more efficient to alter only the 1% of physically stored (non-missing) values as well as the missing value than to generate a new matrix with 99% non-missing values, assuming $missing==0.

Binary Operations

A number of basic binary operations on PDL::CCS::Nd operations are supported, which will produce correct results only under the assumption that "missing" values $missing are annihilators for the operation in question. For example, if we want to compute:

$c = OP($a,$b)

for a binary operation OP on PDL::CCS::Nd objects $a and $b, the current implementation will produce the correct result for $c only if for all values $av in $a and $bv in $b:

OP($av,$b->missing) == OP($a->missing,$b->missing) , and
OP($a->missing,$bv) == OP($a->missing,$b->missing)

This is true in general for OP==\&mult and $missing==0, but not e.g. for OP==\&plus and $missing==0. It should always hold for $missing==BAD (except in the case of assignment, which is a funny kind of operation anyways).

Currently, the only way to ensure that all values are computed correctly in the general case is for $a and $b to contain exactly the same physically indexed values, which rather defeats the purposes of sparse storage, particularly if implicit pseudo-threading is involved (because then we would likely wind up instantiating -- or at least inspecting -- the entire dense matrix). Future implementations may relax these restrictions somewhat.

The following binary operations are implemented:

Arithmetic Operations
FUNCTION     OVERLOADS
plus          +
minus         -
mult          *
divide        /
modulo        %
power         **
Comparisons
FUNCTION     OVERLOADS
gt            >
ge            >=
lt            <
le            <=
eq            ==
ne            !=
spaceship     <=>
Bitwise Operations
FUNCTION     OVERLOADS
and2          &
or2           |
xor           ^
shiftleft     <<
shiftright    >>
Matrix Operations
FUNCTION     OVERLOADS
inner        (none)
matmult       x
Other Operations
FUNCTION     OVERLOADS
rassgn        .=
string        ""

All supported binary operation functions obey the PDL input calling conventions (i.e. they all accept a third argument $swap), and delegate computation to the underlying PDL functions. Note that the PDL::CCS::Nd methods currently do NOT support a third "output" argument. To wrap a new binary operation MY_BINOP into a PDL::CCS::Nd method, you can use the following idiom:

package PDL::CCS::Nd;
*MY_BINOP = _ccsnd_binary_op_mia('MY_BINOP', PDL->can('MY_BINOP'));

The low-level alignment of physically indexed values for binary operations is performed by the function PDL::CCS::ccs_binop_align_block_mia(). Computation is performed block-wise at the perl level to avoid over- rsp. underflow of the space requirements for the output PDL.

Low-Level Object Access

The following methods provide low-level access to PDL::CCS::Nd object structure:

insertWhich
Signature: ($ccs; int whichND(Ndims,Nnz1); vals(Nnz1))

Set or insert values in $ccs for the indices in $whichND to $vals. $whichND need not be sorted. Implicitly makes $ccs physically indexed. Returns the (destructively altered) $ccs.

appendWhich
Signature: ($ccs; int whichND(Ndims,Nnz1); vals(Nnz1))

Like insertWhich(), but assumes that no values for any of the $whichND indices are already present in $ccs. This is faster (because indexNDi need not be called), but less safe.

is_physically_indexed()

Returns true iff only physical dimensions are present.

to_physically_indexed()

Just returns the calling object if all non-missing elements are already physically indexed. Otherwise, returns a new PDL::CCS::Nd object identical to the caller except that all non-missing elements are physically indexed. This may gobble a large amount of memory if the calling element has large dummy dimensions. Also ensures that physical dimension order is identical to logical dimension order.

make_physically_indexed

Wrapper for to_physically_indexed() which eliminates dummy dimensions destructively in the calling object.

Alias: make_physical().

pdims()

Returns the $PDIMS piddle. See "Object Structure", above.

vdims()

Returns the $VDIMS piddle. See "Object Structure", above.

setdims_p(@dims)

Sets $PDIMS piddle. See "Object Structure", above. Returns the calling object. Alias: setdims().

nelem_p()

Returns the number of physically addressable elements.

nelem_v()

Returns the number of virtually addressable elements. Alias for nelem().

_ccs_nvperp()

Returns number of virtually addressable elements per physically addressable element, which should be a positive integer.

nstored_p()

Returns actual number of physically addressed stored elements (aka $Nnz aka $WHICH->dim(1)).

nstored_v()

Returns actual number of physically+virtually addressed stored elements.

nmissing_p()

Returns number of physically addressable elements minus the number of physically stored elements.

nmissing_v()

Returns number of physically+virtually addressable elements minus the number of physically+virtually stored elements.

allmissing()

Returns true iff no non-missing values are stored.

missing()
missing($missing)

Get/set the value to use for missing elements. Returns the (new) value for $missing.

_whichND()
_whichND($whichND)

Get/set the underlying $WHICH piddle.

_nzvals()
_nzvals($storedvals)

Get/set the slice of the underlying $VALS piddle corresponding for non-missing values only. Alias: whichVals().

_vals()
_vals($storedvals)

Get/set the underlying $VALS piddle.

hasptr($pdimi)

Returns true iff a pointer for physical dim $pdimi is cached.

ptr($pdimi)

Get a pointer pair for a physically indexed dimension $pdimi. Uses cached piddles in $PTRS if present, computes & caches otherwise.

$pdimi defaults to zero. If $pdimi is zero, then it should hold that:

all( $pi2nzi==sequence($ccs->nstored_p) )
getptr($pdimi)

Guts for ptr(). Does not check $PTRS and does not cache anything.

clearptr($pdimi)

Clears any cached Harwell-Boeing pointers for physically indexed dimension $pdimi.

clearptrs()

Clears any cached Harwell-Boeing pointers.

flags()
flags($flags)

Get/set object-local $FLAGS.

bad_is_missing()
bad_is_missing($bool)

Get/set the value of the object-local "bad-is-missing" flag. If this flag is set, BAD values in $VALS are considered "missing", regardless of the current value of $missing.

badmissing()

Sets the "bad-is-missing" flag and returns the calling object.

nan_is_missing()
nan_is_missing($bool)

Get/set the value of the object-local "NaN-is-missing" flag. If this flag is set, NaN (and +inf, -inf) values in $VALS are considered "missing", regardless of the current value of $missing.

nanmissing()

Sets the "nan-is-missing" flag and returns the calling object.

General Information

density()

Returns the number of non-missing values divided by the number of indexable values in the sparse object as a perl scalar.

compressionRate()

Returns the compression rate of the PDL::CCS::Nd object compared to a dense piddle of the physically indexable dimensions. Higher values indicate better compression (e.g. lower density). Negative values indicate that dense storage would be more memory-efficient. Pointers are not included in the computation of the compression rate.

ACKNOWLEDGEMENTS

Perl by Larry Wall.

PDL by Karl Glazebrook, Tuomas J. Lukka, Christian Soeller, and others.

KNOWN BUGS

Many.

AUTHOR

Bryan Jurish <moocow@cpan.org>

Copyright (C) 2007-2024, Bryan Jurish. All rights reserved.

This package is free software, and entirely without warranty. You may redistribute it and/or modify it under the same terms as Perl itself.

SEE ALSO

perl(1), PDL(3perl), PDL::SVDLIBC(3perl), PDL::CCS::Nd(3perl),

SVDLIBC: http://tedlab.mit.edu/~dr/SVDLIBC/

SVDPACKC: http://www.netlib.org/svdpack/