# # Net::Dict.pm # # Copyright (C) 2001 Neil Bowers <neilb@cre.canon.co.uk> # Copyright (c) 1998 Dmitry Rubinstein <dimrub@wisdom.weizmann.ac.il>. # # All rights reserved. This program is free software; you can # redistribute it and/or modify it under the same terms as Perl # itself. # # $Id: Dict.pm,v 2.4 2001/04/23 19:37:07 neilb Exp $ # package Net::Dict; use strict; use IO::Socket; use Net::Cmd; use Carp; use vars qw(@ISA $VERSION $debug); $VERSION = sprintf("%d.%02d", q$Revision: 2.4 $ =~ /(\d+)\.(\d+)/); #----------------------------------------------------------------------- # Default values for arguments to new(). We also use this to # determine valid argument names - if it's not a key of this hash, # then it's not a valid argument. #----------------------------------------------------------------------- my %ARG_DEFAULT = ( Port => 2628, Timeout => 120, Debug => 0, Client => "Net::Dict v$VERSION", ); @ISA = qw(Net::Cmd IO::Socket::INET); #======================================================================= # # new() # # constructor - open connection to host, get a list of databases, # and send CLIENT identification command. # #======================================================================= sub new { @_ > 1 or croak 'usage: Net::Dict->new() takes at least a HOST name'; my $class = shift; my $host = shift; int(@_) % 2 == 0 or croak 'Net::Dict->new(): odd number of arguments'; my %inargs = @_; my $self; my $argref; return undef unless defined $host; #------------------------------------------------------------------- # Process arguments, setting defaults if needed #------------------------------------------------------------------- $argref = {}; foreach my $arg (keys %ARG_DEFAULT) { $argref->{$arg} = exists $inargs{$arg} ? $inargs{$arg} : $ARG_DEFAULT{$arg}; delete $inargs{$arg}; } if (keys(%inargs) > 0) { croak "Net::Dict->new(): unknown argument - ", join(', ', keys %inargs); } #------------------------------------------------------------------- # Make the connection #------------------------------------------------------------------- $self = $class->SUPER::new(PeerAddr => $host, PeerPort => $argref->{Port}, Proto => 'tcp', Timeout => $argref->{Timeout} ); return undef unless defined $self; ${*$self}{'net_dict_host'} = $host; $self->autoflush(1); $self->debug($argref->{Debug}); if ($self->response() != CMD_OK) { $self->close(); return undef; } # parse the initial 220 response $self->_parse_banner($self->message); #------------------------------------------------------------------- # Send the CLIENT command which identifies the connecting client #------------------------------------------------------------------- $self->_CLIENT($argref->{Client}); #------------------------------------------------------------------- # The default - search ALL dictionaries #------------------------------------------------------------------- $self->setDicts('*'); return $self; } sub dbs { @_ == 1 or croak 'usage: $dict->dbs() - takes no arguments'; my $self = shift; $self->_get_database_list(); return %{${*$self}{'net_dict_dbs'}}; } sub setDicts { my $self = shift; @{${*$self}{'net_dict_userdbs'}} = @_; } sub serverInfo { @_ == 1 or croak 'usage: $dict->serverInfo()'; my $self = shift; return 0 unless $self->_SHOW_SERVER(); my $info = join('', @{$self->read_until_dot}); $self->getline(); $info; } sub dbInfo { @_ == 2 or croak 'usage: $dict->dbInfo($dbname) - one argument only'; my $self = shift; if ($self->_SHOW_INFO(@_)) { return join('', @{$self->read_until_dot()}); } else { return undef; } } sub dbTitle { @_ == 2 or croak 'dbTitle() method expects one argument - DB name'; my $self = shift; my $dbname = shift; $self->_get_database_list(); if (exists ${${*$self}{'net_dict_dbs'}}{$dbname}) { return ${${*$self}{'net_dict_dbs'}}{$dbname}; } else { carp 'dbTitle(): unknown database name' if $self->debug; return undef; } } sub strategies { @_ == 1 or croak 'usage: $dict->strategies()'; my $self = shift; return 0 unless $self->_SHOW_STRAT(); my(%strats, $name, $desc); foreach (@{$self->read_until_dot()}) { ($name, $desc) = (split /\s/, $_, 2); chomp $desc; $strats{$name} = _unquote($desc); } $self->getline(); %strats; } sub define { @_ >= 2 or croak 'usage: $dict->define($word [, @dbs]) - takes at least one argument'; my $self = shift; my $word = shift; my @dbs = (@_ > 0) ? @_ : @{${*$self}{'net_dict_userdbs'}}; croak 'select some dictionaries with setDicts or supply as argument to define' unless @dbs; my($db, @defs); #------------------------------------------------------------------- # check whether we got an empty word #------------------------------------------------------------------- if (!defined($word) || $word eq '') { carp "empty word passed to define() method"; return undef; } foreach $db (@dbs) { next unless $self->_DEFINE($db, $word); my ($defNum) = ($self->message =~ /^\d{3} (\d+) /); foreach (0..$defNum-1) { my ($d) = ($self->getline =~ /^\d{3} ".*" (\w+) /); my ($def) = join '', @{$self->read_until_dot}; push @defs, [$d, $def]; } $self->getline(); } \@defs; } sub match { @_ >= 3 or croak 'usage: $self->match($word, $strat [, @dbs]) - takes at least two arguments'; my $self = shift; my $word = shift; my $strat = shift; my @dbs = (@_ > 0) ? @_ : @{${*$self}{'net_dict_userdbs'}}; croak 'define some dictionaries by setDicts or supply as argument to define' unless @dbs; my ($db, @matches); #------------------------------------------------------------------- # check whether we got an empty pattern #------------------------------------------------------------------- if (!defined($word) || $word eq '') { carp "empty pattern passed to match() method"; return undef; } foreach $db (@dbs) { next unless $self->_MATCH($db, $strat, $word); my ($db, $w); foreach (@{$self->read_until_dot}) { ($db, $w) = split /\s/, $_, 2; chomp $w; push @matches, [$db, _unquote($w)]; } $self->getline(); } \@matches; } sub auth { @_ == 3 or croak 'usage: $dict->auth() - takes two arguments'; my $self = shift; my $user = shift; my $pass_phrase = shift; my $auth_string; my $string; my $ctx; require Digest::MD5; $string = $self->msg_id().$pass_phrase; $auth_string = Digest::MD5::md5_hex($string); if ($self->_AUTH($user, $auth_string)) { #--------------------------------------------------------------- # clear the cache of database names # next time a method needs them, this will cause us to go # back to the server, and thus pick up any AUTH-restricted DBs #--------------------------------------------------------------- delete ${*$self}{'net_dict_dbs'}; } else { carp "auth() failed with error code ".$self->code() if $self->debug(); return; } } sub status { @_ == 1 or croak 'usage: $dict->status() - takes no arguments'; my $self = shift; my $message; $self->_STATUS() || return 0; chomp($message = $self->message); $message =~ s/^\d{3} //; return $message; } sub capabilities { @_ == 1 or croak 'usage: $dict->capabilities() - takes no arguments'; my $self = shift; return @{ ${*$self}{'net_dict_capabilities'} }; } sub has_capability { @_ == 2 or croak 'usage: $dict->has_capability() - takes one argument'; my $self = shift; my $cap = shift; return grep(lc($cap) eq $_, $self->capabilities()); } sub msg_id { @_ == 1 or croak 'usage: $dict->msg_id() - takes no arguments'; my $self = shift; return ${*$self}{'net_dict_msgid'}; } sub _DEFINE { shift->command('DEFINE', map { '"'.$_.'"' } @_)->response() == CMD_INFO } sub _MATCH { shift->command('MATCH', map { '"'.$_.'"' } @_)->response() == CMD_INFO } sub _SHOW_DB { shift->command('SHOW DB')->response() == CMD_INFO } sub _SHOW_STRAT { shift->command('SHOW STRAT')->response() == CMD_INFO } sub _SHOW_INFO { shift->command('SHOW INFO', @_)->response() == CMD_INFO } sub _SHOW_SERVER { shift->command('SHOW SERVER')->response() == CMD_INFO } sub _CLIENT { shift->command('CLIENT', @_)->response() == CMD_OK } sub _STATUS { shift->command('STATUS')->response() == CMD_OK } sub _HELP { shift->command('HELP')->response() == CMD_INFO } sub _QUIT { shift->command('QUIT')->response() == CMD_OK } sub _OPTION_MIME { shift->command('OPTION MIME')->response() == CMD_OK } sub _AUTH { shift->command('AUTH', @_)->response() == CMD_OK } sub _SASLAUTH { shift->command('SASLAUTH', @_)->response() == CMD_OK } sub _SASLRESP { shift->command('SASLRESP', @_)->response() == CMD_OK } sub quit { my $self = shift; $self->_QUIT; $self->close; } sub DESTROY { my $self = shift; if (defined fileno($self)) { $self->quit; } } sub response { my $self = shift; my $str = $self->getline() || return undef; if ($self->debug) { $self->debug_print(0,$str); } my($code) = ($str =~ /^(\d+) /); ${*$self}{'net_cmd_resp'} = [ $str ]; ${*$self}{'net_cmd_code'} = $code; substr($code,0,1); } #======================================================================= # # _unquote # # Private function used to remove quotation marks from around # a string. # #======================================================================= sub _unquote { my $string = shift; if ($string =~ /^"/) { $string =~ s/^"//; $string =~ s/"$//; } return $string; } #======================================================================= # # _parse_banner # # Parse the initial response banner the server sends when we connect. # Hoping for: # 220 blah blah <auth.mime> <msgid> # The <auth.mime> string gives a list of supported extensions. # The last bit is a msg-id, which identifies this connection, # and is used in authentication, for example. # #======================================================================= sub _parse_banner { my $self = shift; my $banner = shift; my ($code, $capstring, $msgid); ${*$self}{'net_dict_banner'} = $banner; ${*$self}{'net_dict_capabilities'} = []; if ($banner =~ /^(\d{3}) (.*) (<[^<>]*>)?\s+(<[^<>]+>)\s*$/) { ${*$self}{'net_dict_msgid'} = $4; ($capstring = $3) =~ s/[<>]//g; if (length($capstring) > 0) { ${*$self}{'net_dict_capabilities'} = [split(/\./, $capstring)]; } } else { carp "unexpected format for welcome banner on connection:\n", $banner if $self->debug; } } #======================================================================= # # _get_database_list # # Get the list of databases on the remote server. # We cache them in the instance data object, so that dbTitle() # and databases() don't have to go to the server every time. # # We check to see whether we've already got the databases first, # and do nothing if so. This means that this private method # can just be invoked in the public methods. # #======================================================================= sub _get_database_list { my $self = shift; return if exists ${*$self}{'net_dict_dbs'}; if ($self->_SHOW_DB) { my($dbNum)= ($self->message =~ /^\d{3} (\d+)/); my($name, $descr); foreach (0..$dbNum-1) { ($name, $descr) = (split /\s/, $self->getline, 2); chomp $descr; ${${*$self}{'net_dict_dbs'}}{$name} = _unquote($descr); } # Is there a way to do it right? Reading the dot line and the # status line afterwards? Maybe I should use read_until_dot? $self->getline(); $self->getline(); } } #----------------------------------------------------------------------- # Method aliases for backwards compatibility #----------------------------------------------------------------------- *strats = \&strategies; 1; __END__ =head1 NAME Net::Dict - client API for accessing dictionary servers (RFC 2229) =head1 SYNOPSIS use Net::Dict; $dict = Net::Dict->new('dict.server.host'); $h = $dict->define("word"); foreach $i (@{$h}) { ($db, $def) = @{$i}; . . . } =head1 DESCRIPTION C<Net::Dict> is a perl class for looking up words and their definitions on network dictionary servers. C<Net::Dict> provides a simple DICT client API for the network protocol described in RFC2229. Quoting from that RFC: =over =item The Dictionary Server Protocol (DICT) is a TCP transaction based query/response protocol that allows a client to access dictionary definitions from a set of natural language dictionary databases. =back An instance of Net::Dict represents a connection to a single DICT server. For example, to connect to the dictionary server at C<dict.org>, you would write: $dict = Net::Dict->new('dict.org'); A DICT server can provide any number of dictionaries, which are referred to as I<databases>. Each database has a I<name> and a I<title>. The name is a short identifier, typically just one word, used to refer to that database. The title is a brief one-line description of the database. For example, at the time of writing, the C<dict.org> server has 11 databases, including a version of Webster's dictionary from 1913. The name of the database is I<web1913>, and the title is I<Webster's Revised Unabridged Dictionary (1913)>. To look up definitions for a word, you use the C<define> method: $dref = $dict->define('banana'); This returns a reference to a list; each entry in the list is a reference to a two item list: [ $dbname, $definition ] The first entry is a I<database name> as introduced above. The second entry is the text of a definition from the specified dictionary. =head2 MATCHING WORDS In addition the looking up word definitions, you can lookup a list of words which match a given pattern, using the B<match()> method. Each DICT server typically supports a number of I<strategies> which can be used to match words against a pattern. For example, using B<prefix> strategy with a pattern "anti" would find all words in databases which start with "anti": @mref = $dict->match('anti', 'prefix'); foreach my $match (@{ $mref }) { ($db, $word) = @{ $match }; } Similarly the B<suffix> strategy is used to search for words which end in a given pattern. The B<strategies()> method is used to request a list of supported strategies - see L<"METHODS"> for more details. =head2 SELECTING DATABASES By default Net::Dict will look in all databases on the DICT server. This is specified with a special database name of C<*>. You can specify the database(s) to search explicitly, as additional arguments to the C<define> method: $dref = $dict->define('banana', 'wn', 'web1913'); Rather than specify the databases to use every time, you can change the default from '*' using the C<setDicts> method: $dict->setDicts('wn', 'web1913'); Any subsequent calls to C<define> will refer to these databases, unless over-ridden with additional arguments to C<define>. You can find out what databases are available on a server using the C<dbs> method: %dbhash = $dict->dbs(); Each entry in the returned hash has the name of a database as the key, and the corresponding title as the value. There is another special database name - C<!> - which says that all databases should be searched, but as soon as a definition is found, no further databases should be searched. =head1 CONSTRUCTOR $dict = Net::Dict->new (HOST [,OPTIONS]); This is the constructor for a new Net::Dict object. C<HOST> is the name of the remote host on which a Dict server is running. This is required, and must be an explicit host name. B<Note:> previous versions let you give an empty string for the hostname, resulting in selection of default hosts. This behaviour is no longer supported. C<OPTIONS> are passed in a hash like fashion, using key and value pairs. Possible options are: =over 4 =item B<Port> The port number to connect to on the remote machine for the Dict connection (a default port number is 2628, according to RFC2229). =item B<Client> The string to send as the CLIENT identifier. If not set, then a default identifier for Net::Dict is sent. =item B<Timeout> Sets the timeout for the connection, in seconds. Defaults to 120. =item B<Debug> The debug level - a non-zero value will resulting in debugging information being generated, particularly when errors occur. Can be changed later using the C<debug> method, which is inherited from Net::Cmd. More on the debug method can be found in L<Net::Cmd>. =back Making everything explicit, here's how you might call the constructor in your client: $dict = Net::Dict->new($HOST, Port => 2628, Client => "myclient v$VERSION", Timeout => 120, Debug => 0); This will return C<undef> if we failed to make the connection. It will C<die> if bad arguments are passed: no hostname, unknown argument, etc. =head1 METHODS Unless otherwise stated all methods return either a I<true> or I<false> value, with I<true> meaning that the operation was a success. When a method states that it returns a value, failure will be returned as I<undef> or an empty list. =head2 define ( $word [, @dbs] ) returns a reference to an array, whose members are lists, consisting of two elements: the dictionary name and the definition. If no dictionaries are specified, those set by setDicts() are used. =head2 match ( $pattern, $strategy [, @dbs] ) Looks for words which match $pattern according to the specified matching $strategy. Returns a reference to an array, each entry of which is a reference to a two-element array: database name, matching word. =head2 dbs Returns a hash with information on the databases available on the DICT server. The keys are the short names, or identifiers, of the databases; the value is title of the database: %dbhash = $dict->dbs(); print "Available dictionaries:\n"; while (($db, $title) = each %dbhash) { print "$db : $title\n"; } This is the C<SHOW DATABASES> command from RFC 2229. =head2 dbInfo ( $dbname ) Returns a string, containing description of the dictionary $dbname. =head2 setDicts ( @dicts ) Specify the dictionaries that will be searched during the successive define() or match() calls. Defaults to '*'. No existance checks are performed by this interface, so you'd better make sure the dictionaries you specify are on the server (e.g. by calling dbs()). =head2 strategies returns an array, containing an ID of a matching strategy as a key and a verbose description as a value. This method was previously called strats(); that name for the method is also currently supported, for backwards compatibility. =head2 auth ( $USER, $PASSPHRASE ) Attempt to authenticate the specified user, using the scheme described on page 18 of RFC 2229. The user should be known to the server, and $PASSPHRASE is a shared secret known only to the server and the user. For example, if you were using dictd from dict.org, your configuration file might include the following: database private { data "/usr/local/dictd/db/private.dict.dz" index "/usr/local/dictd/db/private.index" access { user connor } } user connor "there can be only one" To be able to access this database, you'd write something like the following: $dict = Net::Dict->new('dict.foobar.com'); $dict->auth('connor', 'there can be only one'); A subsequent call to the C<databases> method would reveal the C<private> database now accessible. Not all servers support the AUTH extension; you can check this with the has_capability() method, described below. =head2 serverInfo Returns a string, containing the information about the server, provided by the server: print "Server Info:\n"; print $dict->serverInfo(), "\n"; This is the C<SHOW SERVER> command from RFC 2229. =head2 dbTitle ( $DBNAME ) Returns the title string for the specified database. This is the same string returned by the C<dbs()> method for all databases. =head2 capabilities Returns a list of the capabilities supported by the DICT server, as described on pages 7 and 8 of RFC 2229. =head2 has_capability ( $cap_name ) Returns true (non-zero) if the DICT server supports the specified capability; false (zero) otherwise. Eg if ($dict->has_capability('auth')) { $dict->auth('genie', 'open sesame'); } =head2 status Send the STATUS command to the DICT server, which will return some server-specific timing or debugging information. This may be useful when debugging or tuning a DICT server, but probably won't be of interest to most users. =head1 KNOWN BUGS AND LIMITATIONS =over 4 =item * The following DICT commands are not currently supported: OPTION MIME =item * No support for firewalls at the moment. =item * Site-wide configuration isn't supported. Previous documentation suggested that it was. =item * Currently no way to specify that results of define and match should be in HTML. This was also previously a config option for the constructor, but it didn't do anything. =back =head1 REPORTING BUGS When reporting bugs/problems please include as much information as possible. It may be difficult for me to reproduce the problem as almost every setup is different. A small script which yields the problem will probably be of help. It would also be useful if this script was run with the extra options C<Debug =E<gt> 1> passed to the constructor, and the output sent with the bug report. If you cannot include a small script then please include a Debug trace from a run of your program which does yield the problem. =head1 EXAMPLES The B<examples> directory of the Net-Dict distribution includes C<simple.pl>, which illustrates basic use of the module. The distribution also includes two example DICT clients: B<dict> is a basic command-line client, and B<tkdict> is a GUI-based client, created using Perl/Tk. =head1 SEE ALSO =over 4 =item RFC 2229 The internet document which defines the DICT protocol. http://www.cis.ohio-state.edu/htbin/rfc/rfc2229.html =item Net::Cmd A module which provides methods for a network command class, such as Net::FTP, Net::SMTP, as well as Net::Dict. Part of the libnet distribution, available from CPAN. =item dictd(8) The reference DICT server, available from B<dict.org>. =item dict(1) The sample client, written in C, which comes with dictd. =item http://www.dict.org/ The home page for the DICT effort; has links to other resources, including other libraries and clients. =back =head1 AUTHOR Net::Dict was written by Dmitry Rubinstein E<lt>dimrub@wisdom.weizmann.ac.ilE<gt>, using Net::FTP and Net::SMTP as a pattern and a model for imitation. The module is now maintained by Neil Bowers E<lt>neilb@cre.canon.co.ukE<gt> =head1 COPYRIGHT Copyright (C) 2001 Canon Research Centre Europe, Ltd. Copyright (c) 1998 Dmitry Rubinstein. All rights reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut