##======================================================================== ## NAME =pod
NAME
GermaNet::Flat - Simple flat interface to GermaNet (and other) thesaurus relations
SYNOPSIS
##========================================================================
## PRELIMINARIES
use GermaNet::Flat;
##========================================================================
## Basics
$gn = GermaNet::Flat->new();
$ver = $gn->dbversion();
$gn = $gn->clear();
##========================================================================
## Relations
##-- Generic relations
\@vals = $gn->relation($rel, $arg);
\&CODE = relationWrapper($relation);
##-- Specific relations
\@lexids = $gn->orth2lex($lemma);
\@lemmas = $gn->lex2orth($lexid);
\@synids = $gn->lex2syn($lexid);
\@lexids = $gn->syn2lex($synid);
\@subids = $gn->hypernyms($synid); # a.k.a. $gn->hyperonyms($synid)
\@supids = $gn->hyponyms($synid);
##-- Convenience wrappers
\@synsets = $gn->get_synsets($lemma);
\@terms = $gn->synset_terms($synset);
##========================================================================
## I/O
##-- generic input (guess input format)
$gn = $CLASS_OR_OBJECT->load($filename_or_xmldirname);
##-- I/O: GermaNet XML directory (input only)
$gn = $gn->loadXmlDir($directoryx);
$gn = $gn->loadXml(@xml_filenames_or_handles);
##-- I/O: raw text
$gn = $gn->loadText($filename_or_fh);
$bool = $gn->saveText($filename_or_fh);
##-- I/O: Berkeley DB
$gn = $gn->loadDB($dbfile);
$bool = $gn->saveDB($dbfilename);
##-- I/O: CDB
$gn = $gn->loadCDB($dbfile);
$bool = $gn->saveCDB($dbfilename);
##-- I/O: Storable
$gn = $gn->loadBin($filename_or_fh);
$bool = $gn->saveBin($filename_or_fh);
##========================================================================
## Low-Level Utilities
\@array_uniq = GermaNet::Flat::auniq(\@array);
@uniq = GermaNet::Flat::luniq(@list);
$gn = $gn->sanitize();
DESCRIPTION
Basics
new
Create and return a new (empty) GermaNet::Flat object. The returned object $gn
is a blessed HASH-ref containing at least a rel
key to store the underlying relation data as a non-deterministic finite partial function:
$gn->{rel} = { "${relation}:${arg}"=>join(' ',@vals), ... };
clear
Clears all data from the object.
Relations
Generic Relations
relation
\@vals = $gn->relation($rel, $arg);
\@vals = $gn->relation($rel, \@args);
Returns the stored value(s) for relation $rel
and argument(s) $arg
rsp. @args
as an ARRAY-ref. Returned value(s) are not necessarily unique.
relationWrapper
\&CODE = relationWrapper($relation);
Returns a CODE-ref for accessing the unique stored value(s) for relation $relation
; basically just a wrapper for "relation".
Specific relations
dbversion
$ver = $gn->dbversion();
Returns the current database version, which is internally represented as the first value of the pseudo-relation dbversion
.
orth2lex
\@lexids = $gn->orth2lex($lemma);
Returns lexical ID(s) for the lemma (string) $lemma
.
lex2orth
\@lemmas = $gn->lex2orth($lexid);
Returns orthographic form(s) for the lexical ID $lexid
.
lex2syn
\@synids = $gn->lex2syn($lexid);
Returns synset ID(s) for the lexical ID $lexid
.
syn2lex
\@lexids = $gn->syn2lex($synid);
Returns lexical ID(s) for the synset ID $synid
.
hypernyms
\@subids = $gn->hypernyms($synid);
\@subids = $gn->hyperonyms($synid);
Returns hyperonym synset IDs (subclasses) for the synset $synid
.
hyponyms
\@supids = $gn->hyponyms($synid);
Returns hyponym sysnset IDs (superclasses) for the synset $synid
.
Convenience wrappers
get_synsets
\@synsets = $gn->get_synsets($lemma);
Returns all synset-IDs for the lemma $lemma
; wraps "orth2lex" and "lex2syn". Uniqueness is not guaranteed.
synset_terms
\@terms = $gn->synset_terms($synset);
Returns all lemma(ta) for the synset ID $synset
; wraps "syn2lex" and "lex2orth". Uniqueness is not guaranteed.
I/O
Generic input
load
$gn = $CLASS_OR_OBJECT->load($filename_or_xmldirname);
Load GermaNet relation data from $filename_or_xmldirname
, which should be some supported GermaNet::Flat
database format:
- GermaNet XML directory
-
If
$filename_or_xmldirname
is a directory, it is assumed to contain GermaNet-format XML which will be loaded by the "loadXmlDir, loadXml" method. - Storable file
-
If
$filename_or_xmldirname
carries the extension .bin or .sto, it will be loaded as a perl Storable HASH-ref using the "loadBin, saveBin" method. - Berkeley DB
-
If
$filename_or_xmldirname
carries the extension .db or .bdb, it will betie()
d as a Berkeley DB file using the "loadDB, saveDB" method. - CDB
-
If
$filename_or_xmldirname
carries the extension .cdb, it will betie()
d as a CDB file using the "loadCDB, saveCDB" method. - Raw Text
-
Otherwise,
$filename_or_xmldirname
is expected to contain raw text relation data to be loaded using the "loadText, saveText" method.
GermaNet XML
loadXmlDir, loadXml
$gn = CLASS_OR_OBJECT->loadXmlDir($directoryx);
$gn = CLASS_OR_OBJECT->loadXml(@xml_filenames_or_handles);
Loads relation data from a directory (first form) or files (second form) assumed to be in GermaNet XML format.
loadBin, saveBin
$gn = $gn->loadBin($filename_or_fh);
$bool = $gn->saveBin($filename_or_fh);
Loads/saves relation data from/to a serialized Storable HASH-ref file or filehandle.
loadDB, saveDB
$gn = $gn->loadDB($dbfile);
$gn = $gn->saveDB($dbfilename);
tie()
s relation data to/from the Berkeley-DB file $dbfile
.
loadCDB, saveCDB
$gn = $gn->loadCDB($dbfile);
$bool = $gn->saveCDB($dbfilename);
tie()
s relation data to/from the CDB file $dbfile
. UTF-8 support is wonky with CDB files.
loadText, saveText
$gn = $gn->loadText($filename_or_fh);
$bool = $gn->saveText($filename_or_fh);
Loads/saves relation data from/to a plain text file $filename_or_fh
. Each line of $filename_or_fh
corresponds to a single relation entry in %{$gn->{rel}}
of the form $KEY\t$VALUES
, where $KEY
is the item key of the form ${RELATION}:${ARG1}
and $VALUES
is a space-separated list of value(s) associated with $ARG1
by $RELATION
.
Low-Level Utilities
auniq
\@array_uniq = GermaNet::Flat::auniq(\@array);
Returns unique values from an ARRAY-ref.
uniq
@uniq = GermaNet::Flat::luniq(@list);
Returns unique values for an array or list.
sanitize
$gn = $gn->sanitize();
Low-level compilation utility for trimming duplicates and extraneous whitespace from relation data values.
AUTHOR
Bryan Jurish <moocow@cpan.org>
COPYRIGHT AND LICENSE
Copyright (C) 2013-2019 by Bryan Jurish
This package is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.24.1 or, at your option, any later version of Perl 5 you may have available.
SEE ALSO
http://www.sfs.uni-tuebingen.de/GermaNet/, https://code.google.com/p/perlapi4germanet, perl(1), ...