repertoire.sa
Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
-------------------------> GNU Sather - sourcefile <-------------------------
-- Copyright (C) 2000 by K Hopper, University of Waikato, New Zealand --
-- This file is part of the GNU Sather library. It is free software; you may --
-- redistribute and/or modify it under the terms of the GNU Library General --
-- Public License (LGPL) as published by the Free Software Foundation; --
-- either version 2 of the license, or (at your option) any later version. --
-- This library is distributed in the hope that it will be useful, but --
-- WITHOUT ANY WARRANTY without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See Doc/LGPL for more details. --
-- The license text is also available from: Free Software Foundation, Inc., --
-- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --
--------------> Please email comments to <bug-sather@gnu.org> <--------------
class REPERTOIRE
class REPERTOIRE is
-- This class implements the tables needed for use when carrying out
-- ordering in accordance with ISO/IEC 14651/2 standards. It permits
-- individual weights to be determined by some ordering mechanism, in
-- addition to providing for equality testing.
-- Version 1.4 Apr 99. Copyright K Hopper, U of Waikato
-- Development History
-- -------------------
-- Date Who By Detail
-- ---- ------ ------
-- 06 Dec 96 kh Original
-- 19 Feb 97 kh Modified to use CARD
-- 10 Apr 97 kh Added 'char' constants
-- 2 Jul 97 kh Separated 'consts' to LIBCHARS
-- 6 Apr 99 kh Modified for text class V8!
private shared priv_default : SAME ;
readonly attr charmap : REP_MAP ;
readonly attr maps : FLIST{ORDERING} ;
-- At repertoire creation time these maps are created, keyed on the
-- code independent character tokens, yielding an array of weights which is
-- to be handled in accordance with the ordering rules.
readonly attr undefined : RANGE_ORDERING ;
-- This provides a default order for a complete character code
-- repertoire, provided that a token is not found in any other rule!
Leaf_Name_ref : STR is
-- This routine creates and returns the name of the default file name "order"
-- in the Resources sub-directory as a reference string.
loc_lib : LIBCHARS := LIBCHARS::default ;
loc_res : CODE_STR := CODE_STR::create(loc_lib) +
CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_O.card,loc_lib) +
CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_R.card,loc_lib) +
CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_D.card,loc_lib) +
CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_E.card,loc_lib) +
CHAR_CODE::create(UNICODE::LATIN_SMALL_LETTER_R.card,loc_lib) ;
return loc_res.tgt_str
end ;
private build_rules(cursor : BIN_CURSOR) : ARRAY{ORDER_RULE}
pre ~void(cursor) and ~cursor.is_done
post true
is
--This private routine builds an order rule set from the binary string
-- indicated.
rule_cnt : CARD := cursor.get_item.card ;
res : ARRAY{ORDER_RULE} := ARRAY{ORDER_RULE}::create(rule_cnt) ;
loop
loc_index : CARD := 0.upto!(rule_cnt - 1) ;
res[loc_index] := ORDER_RULE::create(
BOOL::create(cursor.get_item),
BOOL::create(cursor.get_item))
end ;
return res
end ;
private build_range_map(index : BIN_CURSOR, loc_rules : ARRAY{ORDER_RULE}) : RANGE_ORDERING
pre ~void(index) and ~index.is_done
post ~void(result)
is
-- This routine builds a range ordering from the contents of the
-- file indicated by index.
loc_str : BINSTR := index.get_sized ; -- the initial char code.
loc_low : CARD := CARD::create(loc_str) ; -- treated as range low!
loc_base : ORDER_WEIGHT := ORDER_WEIGHT::build(index) ;
loc_high : CARD := loc_low + index.card - 1 ;
loc_range : RANGE := RANGE::create(loc_low,loc_high) ;
loc_wts : ARRAY{ORDER_WEIGHT} :=
ARRAY{ORDER_WEIGHT}::create(loc_rules.asize) ;
loop
loc_index : CARD := 0.upto!(loc_rules.size - 1) ;
loc_wts[loc_index] := ORDER_WEIGHT::build(index)
end ;
return RANGE_ORDERING::create(loc_range,loc_base,loc_rules,loc_wts)
end ;
private build_a_map(index : BIN_CURSOR) : ORDERING
pre ~void(index) and ~index.is_done
post ~void(result) -- or an exception has been raised
is
-- This routine builds a single ordering from the contents of the
-- file indicated by index.
loc_rules : ARRAY{ORDER_RULE} := build_rules(index) ;
res : ORDERING := ORDERING::create(loc_rules) ;
token_cnt : CARD := index.card ;
loc_wts : ARRAY{ORDER_WEIGHT} ;
loop
token_cnt.times! ;
loc_wts := ARRAY{ORDER_WEIGHT}::create(loc_rules.size) ;
loc_binstr : BINSTR := index.get_sized ;
tok : TOKEN := TOKEN::create(loc_binstr) ;
loop
loc_index : CARD := 0.upto!(loc_rules.size - 1) ;
loc_wts[loc_index] := ORDER_WEIGHT::build(index)
end ;
if ~res.insert(tok,loc_wts) then
SYS_ERROR::blind_error(self,tok.str,LIBCHARS::default)
end
end ;
range_cnt : CARD := index.card ;
loop
range_cnt.times! ;
if ~res.insert(build_range_map(index,loc_rules)) then
SYS_ERROR::blind_error(self,range_cnt.str,LIBCHARS::default)
end
end ;
return res
end ;
create(cult : CULTURE) : SAME
pre ~void(cult)
and ~void(cult.charmap)
and ~void(cult.resource_path)
post true
is
-- This creation routine treats name as the name of a file which contains
-- character encodings of a repertoire, together with synonym and ordering
-- information. This is known as a repertoire map in ISO/IEC 14651. The
-- repertoire file is expected to be a binary image, not the source text
-- which specifies the data.
if cult.state > cult.Charmap then
return cult.collating
end ;
me : SAME := new ;
me.charmap := cult.charmap ;
-- Now try to open the file, read it and unhatch the contents!
loc_path : FILE_PATH := cult.bin_resource_path.append(me.Leaf_Name_ref) ;
fyle : BIN_FILE := BIN_FILE::open_for_read(loc_path.str) ;
if void(fyle) then -- error reported in CULTURE!
return void
end ;
index : BIN_CURSOR := fyle.buffer.binstr.cursor ;
fyle.close ;
if index.get_item.card /= cult.sather_lib.my_size then
-- The wrong file!!
return void
end ;
if void(priv_default) then
priv_default := me
end ;
-- Now the character order maps.
me.maps := FLIST{ORDERING}::create ;
scripts : CARD := index.get_item.card ; -- No of different groups of rules
loop
scripts.times! ;
loc_order : ORDERING := me.build_a_map(index) ;
me.maps := me.maps.push(loc_order) ;
end ;
-- The undefined rules come at the end of the file!
undef_rules : ARRAY{ORDER_RULE} := me.build_rules(index) ;
me.undefined := me.build_range_map(index,undef_rules) ;
if ~index.is_done then
SYS_ERROR::blind_error(self,loc_path.str,LIBCHARS::default)
end ;
return me
end ;
private initialise : SAME is
-- This routine returns the default repertoire object by creating it!
cult : CULTURE := CULTURE::default ;
cult.collating.charmap := cult.charmap ;
return cult.collating
end ;
default : SAME is
-- This routine returns a default object - which may involve retrieving
-- data from the Operating System current culture specifications.
if void(priv_default) then
return initialise
else
return priv_default
end
end ;
private init : SAME is
-- This routine returns the current object or default if self is void.
if void(self) then
return default
else
return self
end
end ;
weights(tok : TOKEN) : TUP{ARRAY{ORDER_WEIGHT},ARRAY{ORDER_RULE}}
pre ~(tok.card = 0) and ~void(self)
post true
is
--This routine searches the tables as needed to find the weight array
-- corresponding to token, returning its value as a copy of the table
-- contents. If the token is not found then void is returned.
loc_code : CHAR_CODE := charmap.code(tok) ;
loc_list : FLIST{TOKEN} := charmap.token_list(loc_code) ;
loop -- over the individual maps
loc_order : ORDERING := maps.elt! ;
if loc_order.contains(loc_list) then
return TUP{ARRAY{ORDER_WEIGHT},ARRAY{ORDER_RULE}}::create(loc_order.weights(loc_list),loc_order.rule)
end
end ;
return TUP{ARRAY{ORDER_WEIGHT},ARRAY{ORDER_RULE}}::create(undefined.weights(tok),undefined.rules)
end ;
private make_tokens(str : CODE_STR) : FLIST{TOKEN}
pre true
post (result.size = str.size)
is
--This private routine returns a list of tokens which correspond to
-- the character codes in the argument string.
res : FLIST{TOKEN} := FLIST{TOKEN}::create ;
loop
loc_ch : CHAR_CODE := str.elt! ;
loc_tok : TOKEN := charmap.token(loc_ch) ;
res := res.push(loc_tok)
end ;
return res
end ;
private make_weights(toks : FLIST{TOKEN},out rules : ARRAY{ORDER_RULE}) : FLIST{ARRAY{ORDER_WEIGHT}}
pre ~void(toks)
post void(result)
or (result.size = toks.size)
is
--This routine retrieves the order weights corresponding to the token
-- list in the same order as that list.
res : FLIST{ARRAY{ORDER_WEIGHT}} := FLIST{ARRAY{ORDER_WEIGHT}}::create ;
loc_res : TUP{ARRAY{ORDER_WEIGHT},ARRAY{ORDER_RULE}} ;
rules := void ;
loop
loc_tok : TOKEN := toks.elt! ;
loc_res := weights(loc_tok) ;
if void(loc_res) then
return void
elsif void(rules) then
rules := loc_res.t2
elsif ~rules.equals(loc_res.t2) then -- impossible match wanted!
return void
end ;
res := res.push(loc_res.t1) ;
end ;
return res
end ;
private one_pass(first : FLIST{ARRAY{ORDER_WEIGHT}},second : FLIST{ARRAY{ORDER_WEIGHT}},index : CARD,rule : ORDER_RULE,for_equality : BOOL) : BOOL is
--This private predicate carries out a single pass through the two
-- arrays with the given index and direction - either for equality or first
-- being earlier than second. True is returned if and only if the required
-- relation is satisfied by the two lists.
loc_index : CARD ;
loop
if rule.left_to_right then
loc_index := 0.upto!(first.size - 1)
else
loc_index := (first.size - 1).downto!(0) ;
end ;
loc_left : ORDER_WEIGHT := first[loc_index][index] ;
loc_right : ORDER_WEIGHT := second[loc_index][index] ;
if ~(loc_left = loc_right) then
if for_equality then
return false
else
return (loc_left < loc_right)
end
end
end ;
return for_equality -- they are equal
end ;
private compare(first,second : CODE_STR,equality : BOOL) : BOOL is
--This predicate returns true if and only if first and second are
-- either equal (if equality is true) or first is less than second if
-- equality is false!
if equality -- a quick short-cut!!
and (first.size /= second.size) then
return false
end ;
res : SAME ;
if void(self) then
res := init
else
res := self
end ;
loc_rules_1 : ARRAY{ORDER_RULE} ;
loc_rules_2 : ARRAY{ORDER_RULE} ;
loc_first : FLIST{ARRAY{ORDER_WEIGHT}} :=
res.make_weights(res.make_tokens(first),out loc_rules_1) ;
loc_second : FLIST{ARRAY{ORDER_WEIGHT}} :=
res.make_weights(res.make_tokens(second),out loc_rules_2) ;
if void(loc_first)
or void(loc_second)
or ~loc_rules_1.equals(loc_rules_2) then -- incomparable!
return false
end ;
loc_res : BOOL ;
loop
index : CARD := 0.upto!(res.undefined.rules.size - 1) ;
loc_res := res.one_pass(loc_first,loc_second,index,
loc_rules_1.elt!,equality) ;
if (equality and ~loc_res)
or (~equality and loc_res) then
return loc_res
end
end ;
return equality
end ;
earlier(low,high : CHAR) : BOOL is
-- This predicate returns true if and only if first is earlier than
-- second in the collating sequence irrespective of their encoding.
return compare(CODE_STR::create(low.code),
CODE_STR::create(high.code),false)
end ;
earlier(low,high : STR) : BOOL is
-- This predicate returns true if and only if first is earlier in the
-- collating sequence than second, irrespective of their encoding.
return compare(CODE_STR::create(low),CODE_STR::create(high),false)
end ;
same(first,second : CHAR) : BOOL is
-- This predicate returns true if and only if first is earlier than
-- second in the collating sequence irrespective of their encoding.
-- In order to simplify encoding the individual characters are treated as
-- being strings of single characters.
return compare(CODE_STR::create(first.code),
CODE_STR::create(second.code),true)
end ;
same(first,second : STR) : BOOL is
-- This predicate returns true if and only if first and second are
-- synonymous characters irrespective of their encoding.
return compare(CODE_STR::create(first),CODE_STR::create(second),true)
end ;
earlier(low,high : RUNE) : BOOL is
-- This predicate returns true if and only if first is earlier than
-- second in the collating sequence irrespective of their encoding.
loc_low : CODE_STR := CODE_STR::create(low.lib) ;
loop
loc_low := loc_low + low.code!
end ;
loc_high : CODE_STR := CODE_STR::create(high.lib) ;
loop
loc_high := loc_high + high.code!
end ;
return compare(loc_low,loc_high,false)
end ;
earlier(low,high : RUNES) : BOOL is
-- This predicate returns true if and only if first is earlier in the
-- collating sequence than second, irrespective of their encoding.
loc_low : CODE_STR := CODE_STR::create(low.index_lib) ;
loop
loc_low := loc_low + low.code!
end ;
loc_high : CODE_STR := CODE_STR::create(high.index_lib) ;
loop
loc_high := loc_high + high.code!
end ;
return compare(loc_low,loc_high,false)
end ;
same(first,second : RUNE) : BOOL is
-- This predicate returns true if and only if first is earlier than
-- second in the collating sequence irrespective of their encoding.
-- In order to simplify encoding they individual characters are treated as
-- being strings of single characters.
loc_first : CODE_STR := CODE_STR::create(first.lib) ;
loop
loc_first := loc_first.push(first.code!)
end ;
loc_second : CODE_STR := CODE_STR::create(second.lib) ;
loop
loc_second := loc_second.push(second.code!)
end ;
return compare(loc_first,loc_second,true)
end ;
same(first,second : RUNES) : BOOL is
-- This predicate returns true if and only if first and second are
-- synonymous characters irrespective of their encoding.
loc_first : CODE_STR := CODE_STR::create(first.index_lib) ;
loop
loc_first := loc_first + first.code!
end ;
loc_second : CODE_STR := CODE_STR::create(second.index_lib) ;
loop
loc_second := loc_second + second.code!
end ;
return compare(loc_first,loc_second,true)
end ;
earlier(low,high : CHAR_CODE) : BOOL is
-- This predicate returns true if and only if first is earlier than
-- second in the collating sequence irrespective of their encoding.
return compare(CODE_STR::create(low),
CODE_STR::create(high),false)
end ;
earlier(low,high : CODE_STR) : BOOL is
-- This predicate returns true if and only if first is earlier in the
-- collating sequence than second, irrespective of their encoding.
return compare(low,high,false)
end ;
same(first,second : CHAR_CODE) : BOOL is
-- This predicate returns true if and only if first is the same character
-- irrespective of their encoding. In order to simplify encoding the
-- individual characters are treated as being strings of single characters.
return compare(CODE_STR::create(first),
CODE_STR::create(second),true)
end ;
same(first,second : CODE_STR) : BOOL is
-- This predicate returns true if and only if first and second are
-- synonymous characters irrespective of their encoding.
return compare(first,second,true)
end ;
end ; -- REPERTOIRE