#
# Net::Dict.pm
#
# Copyright (C) 2001 Neil Bowers <neilb@cre.canon.co.uk>
# Copyright (c) 1998 Dmitry Rubinstein <dimrub@wisdom.weizmann.ac.il>.
#
# All rights reserved. This program is free software; you can
# redistribute it and/or modify it under the same terms as Perl
# itself.
#
# $Id: Dict.pm,v 2.4 2001/04/23 19:37:07 neilb Exp $
#
package Net::Dict;
use strict;
use IO::Socket;
use Net::Cmd;
use Carp;
use vars qw(@ISA $VERSION $debug);
$VERSION = sprintf("%d.%02d", q$Revision: 2.4 $ =~ /(\d+)\.(\d+)/);
#-----------------------------------------------------------------------
# Default values for arguments to new(). We also use this to
# determine valid argument names - if it's not a key of this hash,
# then it's not a valid argument.
#-----------------------------------------------------------------------
my %ARG_DEFAULT =
(
Port => 2628,
Timeout => 120,
Debug => 0,
Client => "Net::Dict v$VERSION",
);
@ISA = qw(Net::Cmd IO::Socket::INET);
#=======================================================================
#
# new()
#
# constructor - open connection to host, get a list of databases,
# and send CLIENT identification command.
#
#=======================================================================
sub new
{
@_ > 1 or croak 'usage: Net::Dict->new() takes at least a HOST name';
my $class = shift;
my $host = shift;
int(@_) % 2 == 0 or croak 'Net::Dict->new(): odd number of arguments';
my %inargs = @_;
my $self;
my $argref;
return undef unless defined $host;
#-------------------------------------------------------------------
# Process arguments, setting defaults if needed
#-------------------------------------------------------------------
$argref = {};
foreach my $arg (keys %ARG_DEFAULT)
{
$argref->{$arg} = exists $inargs{$arg}
? $inargs{$arg}
: $ARG_DEFAULT{$arg};
delete $inargs{$arg};
}
if (keys(%inargs) > 0)
{
croak "Net::Dict->new(): unknown argument - ",
join(', ', keys %inargs);
}
#-------------------------------------------------------------------
# Make the connection
#-------------------------------------------------------------------
$self = $class->SUPER::new(PeerAddr => $host,
PeerPort => $argref->{Port},
Proto => 'tcp',
Timeout => $argref->{Timeout}
);
return undef
unless defined $self;
${*$self}{'net_dict_host'} = $host;
$self->autoflush(1);
$self->debug($argref->{Debug});
if ($self->response() != CMD_OK)
{
$self->close();
return undef;
}
# parse the initial 220 response
$self->_parse_banner($self->message);
#-------------------------------------------------------------------
# Send the CLIENT command which identifies the connecting client
#-------------------------------------------------------------------
$self->_CLIENT($argref->{Client});
#-------------------------------------------------------------------
# The default - search ALL dictionaries
#-------------------------------------------------------------------
$self->setDicts('*');
return $self;
}
sub dbs
{
@_ == 1 or croak 'usage: $dict->dbs() - takes no arguments';
my $self = shift;
$self->_get_database_list();
return %{${*$self}{'net_dict_dbs'}};
}
sub setDicts
{
my $self = shift;
@{${*$self}{'net_dict_userdbs'}} = @_;
}
sub serverInfo
{
@_ == 1 or croak 'usage: $dict->serverInfo()';
my $self = shift;
return 0
unless $self->_SHOW_SERVER();
my $info = join('', @{$self->read_until_dot});
$self->getline();
$info;
}
sub dbInfo
{
@_ == 2 or croak 'usage: $dict->dbInfo($dbname) - one argument only';
my $self = shift;
if ($self->_SHOW_INFO(@_))
{
return join('', @{$self->read_until_dot()});
}
else
{
return undef;
}
}
sub dbTitle
{
@_ == 2 or croak 'dbTitle() method expects one argument - DB name';
my $self = shift;
my $dbname = shift;
$self->_get_database_list();
if (exists ${${*$self}{'net_dict_dbs'}}{$dbname})
{
return ${${*$self}{'net_dict_dbs'}}{$dbname};
}
else
{
carp 'dbTitle(): unknown database name' if $self->debug;
return undef;
}
}
sub strategies
{
@_ == 1 or croak 'usage: $dict->strategies()';
my $self = shift;
return 0
unless $self->_SHOW_STRAT();
my(%strats, $name, $desc);
foreach (@{$self->read_until_dot()})
{
($name, $desc) = (split /\s/, $_, 2);
chomp $desc;
$strats{$name} = _unquote($desc);
}
$self->getline();
%strats;
}
sub define
{
@_ >= 2 or croak 'usage: $dict->define($word [, @dbs]) - takes at least one argument';
my $self = shift;
my $word = shift;
my @dbs = (@_ > 0) ? @_ : @{${*$self}{'net_dict_userdbs'}};
croak 'select some dictionaries with setDicts or supply as argument to define'
unless @dbs;
my($db, @defs);
#-------------------------------------------------------------------
# check whether we got an empty word
#-------------------------------------------------------------------
if (!defined($word) || $word eq '')
{
carp "empty word passed to define() method";
return undef;
}
foreach $db (@dbs)
{
next
unless $self->_DEFINE($db, $word);
my ($defNum) = ($self->message =~ /^\d{3} (\d+) /);
foreach (0..$defNum-1)
{
my ($d) = ($self->getline =~ /^\d{3} ".*" (\w+) /);
my ($def) = join '', @{$self->read_until_dot};
push @defs, [$d, $def];
}
$self->getline();
}
\@defs;
}
sub match
{
@_ >= 3 or croak 'usage: $self->match($word, $strat [, @dbs]) - takes at least two arguments';
my $self = shift;
my $word = shift;
my $strat = shift;
my @dbs = (@_ > 0) ? @_ : @{${*$self}{'net_dict_userdbs'}};
croak 'define some dictionaries by setDicts or supply as argument to define'
unless @dbs;
my ($db, @matches);
#-------------------------------------------------------------------
# check whether we got an empty pattern
#-------------------------------------------------------------------
if (!defined($word) || $word eq '')
{
carp "empty pattern passed to match() method";
return undef;
}
foreach $db (@dbs)
{
next unless $self->_MATCH($db, $strat, $word);
my ($db, $w);
foreach (@{$self->read_until_dot}) {
($db, $w) = split /\s/, $_, 2;
chomp $w;
push @matches, [$db, _unquote($w)];
}
$self->getline();
}
\@matches;
}
sub auth
{
@_ == 3 or croak 'usage: $dict->auth() - takes two arguments';
my $self = shift;
my $user = shift;
my $pass_phrase = shift;
my $auth_string;
my $string;
my $ctx;
require Digest::MD5;
$string = $self->msg_id().$pass_phrase;
$auth_string = Digest::MD5::md5_hex($string);
if ($self->_AUTH($user, $auth_string))
{
#---------------------------------------------------------------
# clear the cache of database names
# next time a method needs them, this will cause us to go
# back to the server, and thus pick up any AUTH-restricted DBs
#---------------------------------------------------------------
delete ${*$self}{'net_dict_dbs'};
}
else
{
carp "auth() failed with error code ".$self->code() if $self->debug();
return;
}
}
sub status
{
@_ == 1 or croak 'usage: $dict->status() - takes no arguments';
my $self = shift;
my $message;
$self->_STATUS() || return 0;
chomp($message = $self->message);
$message =~ s/^\d{3} //;
return $message;
}
sub capabilities
{
@_ == 1 or croak 'usage: $dict->capabilities() - takes no arguments';
my $self = shift;
return @{ ${*$self}{'net_dict_capabilities'} };
}
sub has_capability
{
@_ == 2 or croak 'usage: $dict->has_capability() - takes one argument';
my $self = shift;
my $cap = shift;
return grep(lc($cap) eq $_, $self->capabilities());
}
sub msg_id
{
@_ == 1 or croak 'usage: $dict->msg_id() - takes no arguments';
my $self = shift;
return ${*$self}{'net_dict_msgid'};
}
sub _DEFINE { shift->command('DEFINE', map { '"'.$_.'"' } @_)->response() == CMD_INFO }
sub _MATCH { shift->command('MATCH', map { '"'.$_.'"' } @_)->response() == CMD_INFO }
sub _SHOW_DB { shift->command('SHOW DB')->response() == CMD_INFO }
sub _SHOW_STRAT { shift->command('SHOW STRAT')->response() == CMD_INFO }
sub _SHOW_INFO { shift->command('SHOW INFO', @_)->response() == CMD_INFO }
sub _SHOW_SERVER { shift->command('SHOW SERVER')->response() == CMD_INFO }
sub _CLIENT { shift->command('CLIENT', @_)->response() == CMD_OK }
sub _STATUS { shift->command('STATUS')->response() == CMD_OK }
sub _HELP { shift->command('HELP')->response() == CMD_INFO }
sub _QUIT { shift->command('QUIT')->response() == CMD_OK }
sub _OPTION_MIME { shift->command('OPTION MIME')->response() == CMD_OK }
sub _AUTH { shift->command('AUTH', @_)->response() == CMD_OK }
sub _SASLAUTH { shift->command('SASLAUTH', @_)->response() == CMD_OK }
sub _SASLRESP { shift->command('SASLRESP', @_)->response() == CMD_OK }
sub quit
{
my $self = shift;
$self->_QUIT;
$self->close;
}
sub DESTROY
{
my $self = shift;
if (defined fileno($self)) {
$self->quit;
}
}
sub response
{
my $self = shift;
my $str = $self->getline() || return undef;
if ($self->debug)
{
$self->debug_print(0,$str);
}
my($code) = ($str =~ /^(\d+) /);
${*$self}{'net_cmd_resp'} = [ $str ];
${*$self}{'net_cmd_code'} = $code;
substr($code,0,1);
}
#=======================================================================
#
# _unquote
#
# Private function used to remove quotation marks from around
# a string.
#
#=======================================================================
sub _unquote
{
my $string = shift;
if ($string =~ /^"/)
{
$string =~ s/^"//;
$string =~ s/"$//;
}
return $string;
}
#=======================================================================
#
# _parse_banner
#
# Parse the initial response banner the server sends when we connect.
# Hoping for:
# 220 blah blah <auth.mime> <msgid>
# The <auth.mime> string gives a list of supported extensions.
# The last bit is a msg-id, which identifies this connection,
# and is used in authentication, for example.
#
#=======================================================================
sub _parse_banner
{
my $self = shift;
my $banner = shift;
my ($code, $capstring, $msgid);
${*$self}{'net_dict_banner'} = $banner;
${*$self}{'net_dict_capabilities'} = [];
if ($banner =~ /^(\d{3}) (.*) (<[^<>]*>)?\s+(<[^<>]+>)\s*$/)
{
${*$self}{'net_dict_msgid'} = $4;
($capstring = $3) =~ s/[<>]//g;
if (length($capstring) > 0)
{
${*$self}{'net_dict_capabilities'} = [split(/\./, $capstring)];
}
}
else
{
carp "unexpected format for welcome banner on connection:\n",
$banner if $self->debug;
}
}
#=======================================================================
#
# _get_database_list
#
# Get the list of databases on the remote server.
# We cache them in the instance data object, so that dbTitle()
# and databases() don't have to go to the server every time.
#
# We check to see whether we've already got the databases first,
# and do nothing if so. This means that this private method
# can just be invoked in the public methods.
#
#=======================================================================
sub _get_database_list
{
my $self = shift;
return if exists ${*$self}{'net_dict_dbs'};
if ($self->_SHOW_DB)
{
my($dbNum)= ($self->message =~ /^\d{3} (\d+)/);
my($name, $descr);
foreach (0..$dbNum-1)
{
($name, $descr) = (split /\s/, $self->getline, 2);
chomp $descr;
${${*$self}{'net_dict_dbs'}}{$name} = _unquote($descr);
}
# Is there a way to do it right? Reading the dot line and the
# status line afterwards? Maybe I should use read_until_dot?
$self->getline();
$self->getline();
}
}
#-----------------------------------------------------------------------
# Method aliases for backwards compatibility
#-----------------------------------------------------------------------
*strats = \&strategies;
1;
__END__
=head1 NAME
Net::Dict - client API for accessing dictionary servers (RFC 2229)
=head1 SYNOPSIS
use Net::Dict;
$dict = Net::Dict->new('dict.server.host');
$h = $dict->define("word");
foreach $i (@{$h}) {
($db, $def) = @{$i};
. . .
}
=head1 DESCRIPTION
C<Net::Dict> is a perl class for looking up words and their
definitions on network dictionary servers.
C<Net::Dict> provides a simple DICT client API for the network
protocol described in RFC2229. Quoting from that RFC:
=over
=item
The Dictionary Server Protocol (DICT) is a TCP transaction based
query/response protocol that allows a client to access dictionary
definitions from a set of natural language dictionary databases.
=back
An instance of Net::Dict represents a connection to a single
DICT server. For example, to connect to the dictionary
server at C<dict.org>, you would write:
$dict = Net::Dict->new('dict.org');
A DICT server can provide any number of dictionaries,
which are referred to as I<databases>.
Each database has a I<name> and a I<title>.
The name is a short identifier,
typically just one word, used to refer to that database.
The title is a brief one-line description of the database.
For example, at the time of writing, the C<dict.org> server
has 11 databases, including a version of Webster's
dictionary from 1913. The name of the database is I<web1913>,
and the title is I<Webster's Revised Unabridged Dictionary (1913)>.
To look up definitions for a word, you use the C<define> method:
$dref = $dict->define('banana');
This returns a reference to a list; each entry in the list
is a reference to a two item list:
[ $dbname, $definition ]
The first entry is a I<database name> as introduced above.
The second entry is the text of a definition from
the specified dictionary.
=head2 MATCHING WORDS
In addition the looking up word definitions,
you can lookup a list of words which match a given
pattern, using the B<match()> method.
Each DICT server typically supports a number of I<strategies>
which can be used to match words against a pattern.
For example, using B<prefix> strategy with a pattern "anti"
would find all words in databases which start with "anti":
@mref = $dict->match('anti', 'prefix');
foreach my $match (@{ $mref })
{
($db, $word) = @{ $match };
}
Similarly the B<suffix> strategy is used to search for words
which end in a given pattern.
The B<strategies()> method is used to request a list of supported
strategies - see L<"METHODS"> for more details.
=head2 SELECTING DATABASES
By default Net::Dict will look in all databases on the DICT server.
This is specified with a special database name of C<*>.
You can specify the database(s) to search explicitly,
as additional arguments to the C<define> method:
$dref = $dict->define('banana', 'wn', 'web1913');
Rather than specify the databases to use every time,
you can change the default from '*' using the C<setDicts> method:
$dict->setDicts('wn', 'web1913');
Any subsequent calls to C<define> will refer to these databases,
unless over-ridden with additional arguments to C<define>.
You can find out what databases are available on a server
using the C<dbs> method:
%dbhash = $dict->dbs();
Each entry in the returned hash has the name of a database as the key,
and the corresponding title as the value.
There is another special database name - C<!> - which says that
all databases should be searched, but as soon as a definition is
found, no further databases should be searched.
=head1 CONSTRUCTOR
$dict = Net::Dict->new (HOST [,OPTIONS]);
This is the constructor for a new Net::Dict object. C<HOST> is the
name of the remote host on which a Dict server is running.
This is required, and must be an explicit host name.
B<Note:> previous versions let you give an empty string
for the hostname, resulting in selection of default hosts.
This behaviour is no longer supported.
C<OPTIONS> are passed in a hash like fashion, using key and value pairs.
Possible options are:
=over 4
=item B<Port>
The port number to connect to on the remote machine for the
Dict connection (a default port number is 2628, according to RFC2229).
=item B<Client>
The string to send as the CLIENT identifier.
If not set, then a default identifier for Net::Dict is sent.
=item B<Timeout>
Sets the timeout for the connection, in seconds.
Defaults to 120.
=item B<Debug>
The debug level - a non-zero value will resulting in debugging
information being generated, particularly when errors occur.
Can be changed later using the C<debug> method,
which is inherited from Net::Cmd.
More on the debug method can be found in L<Net::Cmd>.
=back
Making everything explicit, here's how you might call
the constructor in your client:
$dict = Net::Dict->new($HOST,
Port => 2628,
Client => "myclient v$VERSION",
Timeout => 120,
Debug => 0);
This will return C<undef> if we failed to make the connection.
It will C<die> if bad arguments are passed: no hostname,
unknown argument, etc.
=head1 METHODS
Unless otherwise stated all methods return either a I<true> or I<false>
value, with I<true> meaning that the operation was a success. When a method
states that it returns a value, failure will be returned as I<undef> or an
empty list.
=head2 define ( $word [, @dbs] )
returns a reference to an array, whose members are lists,
consisting of two elements: the dictionary name and the definition.
If no dictionaries are specified, those set by setDicts() are used.
=head2 match ( $pattern, $strategy [, @dbs] )
Looks for words which match $pattern according to the specified
matching $strategy.
Returns a reference to an array,
each entry of which is a reference to a two-element
array: database name, matching word.
=head2 dbs
Returns a hash with information on the databases available
on the DICT server.
The keys are the short names, or identifiers, of the databases;
the value is title of the database:
%dbhash = $dict->dbs();
print "Available dictionaries:\n";
while (($db, $title) = each %dbhash)
{
print "$db : $title\n";
}
This is the C<SHOW DATABASES> command from RFC 2229.
=head2 dbInfo ( $dbname )
Returns a string, containing description of
the dictionary $dbname.
=head2 setDicts ( @dicts )
Specify the dictionaries that will be
searched during the successive define() or match() calls.
Defaults to '*'.
No existance checks are performed by this interface, so you'd better make
sure the dictionaries you specify are on the server (e.g. by calling
dbs()).
=head2 strategies
returns an array, containing an ID of a matching strategy
as a key and a verbose description as a value.
This method was previously called strats();
that name for the method is also currently supported,
for backwards compatibility.
=head2 auth ( $USER, $PASSPHRASE )
Attempt to authenticate the specified user, using the scheme
described on page 18 of RFC 2229.
The user should be known to the server, and $PASSPHRASE
is a shared secret known only to the server and the user.
For example, if you were using dictd from dict.org,
your configuration file might include the following:
database private {
data "/usr/local/dictd/db/private.dict.dz"
index "/usr/local/dictd/db/private.index"
access { user connor }
}
user connor "there can be only one"
To be able to access this database, you'd write
something like the following:
$dict = Net::Dict->new('dict.foobar.com');
$dict->auth('connor', 'there can be only one');
A subsequent call to the C<databases> method would
reveal the C<private> database now accessible.
Not all servers support the AUTH extension;
you can check this with the has_capability() method,
described below.
=head2 serverInfo
Returns a string, containing the information about the server,
provided by the server:
print "Server Info:\n";
print $dict->serverInfo(), "\n";
This is the C<SHOW SERVER> command from RFC 2229.
=head2 dbTitle ( $DBNAME )
Returns the title string for the specified database.
This is the same string returned by the C<dbs()> method
for all databases.
=head2 capabilities
Returns a list of the capabilities supported by the DICT server,
as described on pages 7 and 8 of RFC 2229.
=head2 has_capability ( $cap_name )
Returns true (non-zero) if the DICT server supports the
specified capability; false (zero) otherwise. Eg
if ($dict->has_capability('auth')) {
$dict->auth('genie', 'open sesame');
}
=head2 status
Send the STATUS command to the DICT server,
which will return some server-specific timing
or debugging information.
This may be useful when debugging or tuning a DICT server,
but probably won't be of interest to most users.
=head1 KNOWN BUGS AND LIMITATIONS
=over 4
=item *
The following DICT commands are not currently supported:
OPTION MIME
=item *
No support for firewalls at the moment.
=item *
Site-wide configuration isn't supported. Previous documentation
suggested that it was.
=item *
Currently no way to specify that results of define and match
should be in HTML. This was also previously a config option
for the constructor, but it didn't do anything.
=back
=head1 REPORTING BUGS
When reporting bugs/problems please include as much information as possible.
It may be difficult for me to reproduce the problem as almost every setup
is different.
A small script which yields the problem will probably be of help. It would
also be useful if this script was run with the extra options C<Debug =E<gt> 1>
passed to the constructor, and the output sent with the bug report. If you
cannot include a small script then please include a Debug trace from a
run of your program which does yield the problem.
=head1 EXAMPLES
The B<examples> directory of the Net-Dict distribution
includes C<simple.pl>, which illustrates basic use of the module.
The distribution also includes two example DICT clients:
B<dict> is a basic command-line client, and B<tkdict>
is a GUI-based client, created using Perl/Tk.
=head1 SEE ALSO
=over 4
=item RFC 2229
The internet document which defines the DICT protocol.
http://www.cis.ohio-state.edu/htbin/rfc/rfc2229.html
=item Net::Cmd
A module which provides methods for a network command class,
such as Net::FTP, Net::SMTP, as well as Net::Dict.
Part of the libnet distribution, available from CPAN.
=item dictd(8)
The reference DICT server, available from B<dict.org>.
=item dict(1)
The sample client, written in C, which comes with dictd.
=item http://www.dict.org/
The home page for the DICT effort; has links to other resources,
including other libraries and clients.
=back
=head1 AUTHOR
Net::Dict was written by
Dmitry Rubinstein E<lt>dimrub@wisdom.weizmann.ac.ilE<gt>,
using Net::FTP and Net::SMTP as a pattern and a model for imitation.
The module is now maintained by
Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt>
=head1 COPYRIGHT
Copyright (C) 2001 Canon Research Centre Europe, Ltd.
Copyright (c) 1998 Dmitry Rubinstein. All rights reserved.
This module is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut