chclass.sa


Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
 
------------------------->  GNU Sather - sourcefile  <-------------------------
-- Copyright (C) 2000 by K Hopper, University of Waikato, New Zealand        --
-- This file is part of the GNU Sather library. It is free software; you may --
-- redistribute  and/or modify it under the terms of the GNU Library General --
-- Public  License (LGPL)  as published  by the  Free  Software  Foundation; --
-- either version 2 of the license, or (at your option) any later version.   --
-- This  library  is distributed  in the  hope that it will  be  useful, but --
-- WITHOUT ANY WARRANTY without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See Doc/LGPL for more details.       --
-- The license text is also available from:  Free Software Foundation, Inc., --
-- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA                     --
-------------->  Please email comments to <bug-sather@gnu.org>  <--------------


immutable class CHAR_CLASS < $ENUMS{CHAR_CLASS}

immutable class CHAR_CLASS < $ENUMS{CHAR_CLASS} is -- This class defines the character classes which are specified in -- the extended regular expression grammar (see ISO/IEC 9945-2) as amended -- by ISO/IEC 14652. -- The corresponding message file should contain the following -- strings in the current cultural representation and encoding - -- -- alpha -- classes defined as in ISO/IEC 14652 -- blank -- cntrl -- digit -- outdigit -- graph -- lower -- print -- punct -- space -- num_terminator -- upper -- xdigit -- left_to_right -- right_to_left -- num_separator -- segment_separator -- block_separator -- direction_control -- sym_swap_layout -- char_shape_selector -- num_shape_selector -- non_spacing -- non_spacing_level3 -- normal_connect -- r_connect -- no_connect -- no_connect_space -- vowel_connect -- special1 -- special2 -- special3 -- xdigit -- numeric -- NOT in 14652 -- combining -- NOT in 14652 -- Version 1.2 Oct 98. Copyright K Hopper, U of Waikato -- Development History -- ------------------- -- Date Who By Detail -- ---- ------ ------ -- 30 May 97 kh Original for Extended Regular_Expressions -- 19 Feb 98 kh Updated for ISO/IEC 14652 changes. -- 30 Oct 98 kh Refined, added pre/post conditions include EXACT_ENUM{CHAR_CLASS} ; private const val_count : CARD := 35 ; -- The next routines provide the enumeration itself. Alphabetic : SAME is return enum(1) end ; Blank : SAME is return enum(2) end ; Block_Separator : SAME is return enum(3) end ; Char_Shape_Selector : SAME is return enum(4) end ; Combining : SAME is return enum(5) end ; Control : SAME is return enum(6) end ; Digit : SAME is return enum(7) end ; Direction_Control : SAME is return enum(8) end ; Graphic : SAME is return enum(9) end ; Hex_Digit : SAME is return enum(10) end ; Lower_Case : SAME is return enum(11) end ; Left_to_Right : SAME is return enum(12) end ; No_Connect : SAME is return enum(13) end ; No_Connect_Space : SAME is return enum(14) end ; Non_Spacing : SAME is return enum(15) end ; Non_Spacing_Level3 : SAME is return enum(16) end ; Normal_Connect : SAME is return enum(17) end ; Numeric : SAME is return enum(18) end ; Numeric_Separator : SAME is return enum(19) end ; Num_Shape_Selector : SAME is return enum(20) end ; Numeric_Terminator : SAME is return enum(21) end ; Out_Digit : SAME is return enum(22) end ; Printable : SAME is return enum(23) end ; Punctuation : SAME is return enum(24) end ; Right_Connect : SAME is return enum(25) end ; Right_to_Left : SAME is return enum(26) end ; Segment_Separator : SAME is return enum(27) end ; Space : SAME is return enum(28) end ; Special_1 : SAME is return enum(29) end ; Special_2 : SAME is return enum(30) end ; Special_3 : SAME is return enum(31) end ; Sym_Swap_Layout : SAME is return enum(32) end ; Terminator : SAME is return enum(33) end ; Upper_Case : SAME is return enum(34) end ; Vowel_Connect : SAME is return enum(35) end ; private code_kind( ch_code : CHAR_CODE, cult : CULTURE ) : SAME pre ~void(cult) post true is -- This routine returns one of the character classes to which ch -- belongs, provided that it is a member of a defined class - otherwise -- void. If ch belongs to more than one class then the result is -- non-deterministic being culture-dependent. charclasses : FMAP{CHAR_CLASS,CHAR_GROUP} := cult.char_data.classes ; loop loc_key : SAME := charclasses.keys! ; if charclasses.get(loc_key).contains(ch_code) then return loc_key end end ; return void end ; kind( ch : CHAR, cult : CULTURE ) : SAME pre ~void(cult) and (cult.kind.size = 1) post true is -- This routine returns the most restrictive character class of ch, -- provided that it is a member of a defined class - otherwise void. return code_kind(ch.code,cult) end ; kind( ch : CHAR ) : SAME is -- This routine returns the most restrictive character class of ch using -- the default current culture class. return code_kind(ch.code,CULTURE::default) end ; is_a( ch : CHAR, test_class : SAME, culture : CULTURE ) : BOOL pre ~test_class.is_nil and ~void(culture) post true is -- This routine returns true if and only if ch is a member of the test -- class (providing it exists!), otherwise false. loc_grp : CHAR_GROUP := culture.char_data.classes.get(test_class) ; if void(loc_grp) then return false else return loc_grp.contains(ch.code) end end ; is_a( ch : CHAR, test_class : SAME ) : BOOL pre ~test_class.is_nil post true is -- This routine returns true if and only if ch is a member of the test -- class (providing it exists!) using the default repertoire and encoding, -- otherwise false. loc_grp : CHAR_GROUP := CULTURE::default.char_data.classes.get(test_class) ; if void(loc_grp) then return false else return loc_grp.contains(ch.code) end end ; contains( ch : CHAR, lib : LIBCHARS ) : BOOL pre ~is_nil and ~void(lib) post true is -- This routine returns true if and only if ch is a member of the class -- indicated by self, using the given repertoire and encoding. return is_a(ch,self,lib.culture) end ; contains( ch : CHAR ) : BOOL pre ~is_nil post true is -- This routine returns true if and only if ch is a member of the class -- indicated by self. return is_a(ch,self,CULTURE::default) end ; end ; -- CHAR_CLASS

immutable class CHAR_MAPPINGS < $ENUMS{CHAR_MAPPINGS}

immutable class CHAR_MAPPINGS < $ENUMS{CHAR_MAPPINGS} is -- This class defines the character mappings which are specified in -- ISO/IEC 14652. -- The corresponding message file should contain the following -- strings in the current cultural representation and encoding - -- -- toupper -- tolower -- tosymmetric -- Version 1.1 Oct 98. Copyright K Hopper, U of Waikato -- Development History -- ------------------- -- Date Who By Detail -- ---- ------ ------ -- 23 Feb 98 kh Original from ISO/IEC 14652. -- 30 Oct 98 kh Refined, added pre/post conditions include EXACT_ENUM{CHAR_MAPPINGS} ; private const val_count : CARD := 3 ; -- The next routines provide the enumeration itself. To_Upper : SAME is return enum(1) end ; To_Lower : SAME is return enum(2) end ; To_Symmetric : SAME is return enum(3) end ; is_mapped( ch : CHAR, cult : CULTURE ) : BOOL pre ~void(cult) post true -- and the mapping has been done is -- This predicate returns true if and only if ch is in the range or -- domain of any mapping provided for the given culture. loc_map : CHAR_MAP := cult.char_data.mappings.get(self) ; loc_lib : LIBCHARS := cult.sather_lib ; loc_ch_code : CHAR_CODE := CHAR_CODE::create(ch,loc_lib) ; return loc_map.is_mapped(loc_ch_code) end ; to_domain( ch : CHAR, cult : CULTURE ) : CHAR pre ~void(cult) post true -- and the mapping has been done is -- This routine returns the value of ch as mapped by the mapping -- specified by self using the given repertoire and encoding. loc_map : CHAR_MAP := cult.char_data.mappings.get(self) ; loc_lib : LIBCHARS := cult.sather_lib ; res : CHAR_CODE := loc_map.to_domain(CHAR_CODE::create(ch,loc_lib)) ; return res.char end ; to_domain( ch : CHAR ) : CHAR pre true post true -- and the mapping has been done is -- This routine returns the value of ch as mapped by the mapping -- specified by self using the default repertoire and encoding. return to_domain(ch,CULTURE::default) end ; to_range( ch : CHAR, cult : CULTURE ) : CHAR pre ~void(cult) post true -- and the mapping has been done is -- This routine returns the value of ch as mapped by the inverse mapping -- specified by self using the given repertoire and encoding. loc_map : CHAR_MAP := cult.char_data.mappings.get(self) ; loc_lib : LIBCHARS := cult.sather_lib ; return loc_map.to_range(CHAR_CODE::create(ch,loc_lib)).char end ; to_range( ch : CHAR ) : CHAR is -- This routine returns the value of ch as mapped by the inverse mapping -- specified by self using the default repertoire and encoding. return to_range(ch,CULTURE::default) end ; end ; -- CHAR_MAPPINGS

class CHAR_TYPES < $BINARY

class CHAR_TYPES < $BINARY is -- This class defines the internationalisation components of character -- classification and mapping data as defined in ISO/IEC 14652 (as may be -- amended). -- -- The following are the LC_CTYPE components at the time of writing :- -- -- character classes (see also the class CHAR_CLASS) -- -- character mappings (see also the class CHAR_MAPPINGS) -- -- a table of synonyms -- Version 1.0 Jun 98. Copyright K Hopper, U of Waikato -- Development History -- ------------------- -- Date Who By Detail -- ---- ------ ------ -- 26 Jun 98 kh Original include BINARY ; readonly attr classes : FMAP{CHAR_CLASS,CHAR_GROUP} ; readonly attr mappings : FMAP{CHAR_MAPPINGS,CHAR_MAP} ; readonly attr trans_table : FMAP{STR,STR} ; create : SAME is -- This routine creates a new object and sets the components to empty. me : SAME := new ; me.classes := FMAP{CHAR_CLASS,CHAR_GROUP}::create ; me.mappings := FMAP{CHAR_MAPPINGS,CHAR_MAP}::create ; me.trans_table := FMAP{STR,STR}::create ; return me end ; insert( new_class : CHAR_CLASS, new_group : CHAR_GROUP ) pre ~new_class.is_nil and ~void(new_group) post classes.test(new_class) -- and (classes.get(new_class) = new_group) is -- This routine inserts a new class into the character information, -- irrespective of whether there is a class of the given type (when the -- entry will be over-written) or not. classes := classes.insert(new_class,new_group) end ; delete( old_class : CHAR_CLASS ) pre ~void(self) and ~old_class.is_nil post ~classes.test(old_class) is -- This routine ensures that there is no entry for the given class name -- in the classes map. if classes.test(old_class) then -- something to delete classes := classes.delete(old_class) end end ; insert( new_map : CHAR_MAPPINGS, new_mapping : CHAR_MAP ) pre ~new_map.is_nil and ~void(new_mapping) post mappings.test(new_map) -- and (mappings.get(new_map) = new_mapping) is -- This routine inserts a new class into the character information, -- irrespective of whether there is a class of the given type (when the -- entry will be over-written) or not. mappings := mappings.insert(new_map,new_mapping) end ; delete( old_mapping : CHAR_MAPPINGS ) pre ~void(self) and ~old_mapping.is_nil post ~mappings.test(old_mapping) is -- This routine ensures that there is no entry for the given class name -- in the classes map. if mappings.test(old_mapping) then -- something to delete mappings := mappings.delete(old_mapping) end end ; new_synonym( range : STR, domain : STR ) pre ~void(range) and ~void(domain) and (range.index_lib = domain.index_lib) post trans_table.test(range) and (trans_table.get(range) = domain) is -- This routine inserts a new synonym into the table, replacing any -- former synonym if already present. trans_table := trans_table.insert(range,domain) end ; build( str : BIN_CURSOR ) : SAME pre ~void(str) and ~str.is_done post ~void(result) or (void(result) and (str.index = initial(str.index))) is -- This routine builds an object of this class from the binary string -- given in the cursor. loc_cnt : CARD ; start_index : CARD := str.index ; me : SAME := create ; loc_cnt := str.get_item.card ; loop loc_cnt.times! ; loc_grp : CHAR_CLASS := CHAR_CLASS::build(str) ; if str.is_done then break! end ; loc_entry : CHAR_GROUP := CHAR_GROUP::build(str) ; if str.is_done then break! end ; me.classes := me.classes.insert(loc_grp,loc_entry) end ; if str.is_done or (str.card /= CARD::nil) then str.set_index(start_index) ; return void end ; loc_cnt := str.get_item.card ; loop loc_cnt.times! ; loc_map : CHAR_MAPPINGS := CHAR_MAPPINGS::build(str) ; if str.is_done then break! end ; loc_entry : CHAR_MAP := CHAR_MAP::build(str) ; if str.is_done then break! end ; me.mappings := me.mappings.insert(loc_map,loc_entry) end ; if str.is_done or (str.card /= CARD::nil) then str.set_index(start_index) ; return void end ; return me end ; binstr : BINSTR pre ~void(self) post true -- should be - create(result) = self is -- This routine creates a binary string representation of self. res : BINSTR := BINSTR::create ; res := res + OCTET::create(classes.size) ; loop loc_grp : CHAR_CLASS := classes.keys! ; res := res + loc_grp.binstr + classes.get(loc_grp).binstr end ; res := res + OCTET::create(mappings.size) ; loop loc_map : CHAR_MAPPINGS := mappings.keys! ; res := res + loc_map.binstr + mappings.get(loc_map).binstr end ; res := res + trans_table.size.binstr ; loop loc_str : BINSTR := trans_table.keys!.binstr ; target_str : BINSTR := (trans_table.get(loc_str.str)).binstr ; res := res + loc_str.size.binstr + loc_str + target_str.size.binstr + target_str end ; return res end ; end ; --CHAR_TYPES