runes.sa


Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
 
------------------------->  GNU Sather - sourcefile  <-------------------------
-- Copyright (C) 2000 by K Hopper, University of Waikato, New Zealand        --
-- This file is part of the GNU Sather library. It is free software; you may --
-- redistribute  and/or modify it under the terms of the GNU Library General --
-- Public  License (LGPL)  as published  by the  Free  Software  Foundation; --
-- either version 2 of the license, or (at your option) any later version.   --
-- This  library  is distributed  in the  hope that it will  be  useful, but --
-- WITHOUT ANY WARRANTY without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See Doc/LGPL for more details.       --
-- The license text is also available from:  Free Software Foundation, Inc., --
-- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA                     --
-------------->  Please email comments to <bug-sather@gnu.org>  <--------------


class RUNES < $TEXT_STRING{RUNE,FRUNES,RUNES}, $IMMUTABLE

class RUNES < $TEXT_STRING{RUNE,FRUNES,RUNES}, $IMMUTABLE is -- This class provides a string encoding of arbitrary codes of 32-bits. -- It provides most of the operations which are provided in the class STR -- in respect of runes which are single encodings. -- Version 1.4 Apr 99. Copyright K Hopper, U of Waikato -- Development History -- ------------------- -- Date Who By Detail -- ---- ------ ------ -- 11 May 96 kh Original from Sather STR class -- 8 Nov 96 kh Now uses generic base class. -- 5 Apr 97 kh Modified for INT to CARD -- 13 Oct 98 kh Revised and added pre/post conditions -- 13 Apr 99 kh Completely re-written for V8 of text classes include TEXT_STRING{RUNE,FRUNES} plus -> private raw_plus, from_fstr -> private from_fstr, set! -> ; -- NOT for runes! private shared width : CARD := 4 ; -- merely to keep strings happy! private attr indices : FLIST{CARD} ; -- NOTE The usage of the indices array in this class is different -- from that in the class FRUNES. The reason for this is -- that the latter has a feature 'loc' which indicates the next -- element to be filled. The last element of the index -- array serves that purpose in this class. private create( size : CARD, -- IN OCTETS!!!!!!!!!!!!!! lib : LIBCHARS ) : SAME is -- This routine returns an empty rune string. me : SAME := new(size) ; me.indices := FLIST{CARD}::create.push(0) ; me.priv_lib := REP_LIB_LIST::index(lib) ; return me end ; create( sz : CARD ) : SAME is -- This routine is the version of creation which assumes the default -- cultural repertoire and environment and that the size is in octets. return create(sz,LIBCHARS::default) end ; create( lib : LIBCHARS ) : SAME is -- This routine returns an empty rune string with the indicated encoding -- and repertoire. return create(0,lib) end ; create : SAME is -- This routine returns an empty rune string. return create(0,LIBCHARS::default) end ; create_from_ucs2( str : BINSTR, lib : LIBCHARS ) : SAME pre ~void(lib) and (lib.culture.kind = CODE_KINDS::UCS2) and (str.size > 0) and ((str.size % 2) = 0) -- must be even! post ~void(result) is -- This routine builds the runes from the binary string indicated, -- assuming that the encoding is UCS2 - not UCS4. return build(str.cursor,lib) end ; create( rune : RUNE ) : SAME is -- This routine creates and then returns a single element rune -- string. me : SAME := create(0,rune.lib) ; me := me + rune ; return me end ; create( ch_code : CHAR_CODE ) : SAME is -- This routine creates a new single code rune from the given character -- code and then returns it. return create(ch_code.rune) end ; create( ch : CHAR, lib : LIBCHARS ) : SAME pre ~void(str) post (result.size = str.size) is -- This routine converts a single character - presumed to be in the -- given repertoire and encoding - into a string of runes. return CODE_CONVERTER::runes(lib,ch) end ; create( str : STR ) : SAME pre ~void(str) post (result.size = str.size) is -- This routine converts a string of the default character into -- a string of runes. The result is an array for which each element value -- is no greater than the default character encoding maximum -- unless -- str is empty (void) when the result is void. lgth : CARD := str.size ; res : SAME := create(str.size * str.index_lib.my_size,str.index_lib) ; index : CARD := 0 ; loop loc_code : CHAR_CODE := str.code! ; res.aset(index,loc_code.rune) ; index := index + 1 end ; return res end ; from_frunes( fstr : FRUNES ) : SAME pre ~void(fstr) post (result.size = fstr.size) is -- This routine converts the given fast rune string into the normal -- string form (which has immutable semantics). The 'generic' from_fstr -- routine is used to avoid renaming problems for other uses. return from_fstr(fstr) end ; cursor : RUNES_CURSOR pre ~void(self) post ~void(result) is -- This routine returns a cursor object corresponding to the contents of -- self. return RUNES_CURSOR::create(self) end ; size : CARD is -- This routine returns the number of runes in self, or zero if self -- is void. if void(self) then return 0 else return indices.size - 1 end end ; private buffer_scan pre ~void(self) post (indices.size = size + 1) is -- This routine scans the buffer and sets up a new index list from -- the actual contents. Any alteration in contents should be accompanied -- within this class of a call to this routine. loc_bin : BINSTR := binstr ; loc_index : CARD := 0 ; indices := FLIST{CARD}::create ; -- a new list! loop if ~(CHAR_CODE::create(loc_bin.chunk!( index_lib.my_size),index_lib)).is_combining then indices := indices.push(loc_index) end ; loc_index := loc_index + index_lib.my_size end ; indices := indices.push(loc_index) end ; plus( elem : RUNE ) : SAME pre void(self) or (elem.lib = index_lib) post ~void(result) or (result.size = initial(size) + 1) is -- This routine appends the given element to self, returning the -- resulting string. res : SAME ; loc_index : CARD := size ; -- which may, of course, be zero if void(self) or (asize = 0) then res := create(elem.asize,elem.lib) else res := new(asize + elem.asize) ; res.indices := FLIST{CARD}::create ; res.priv_lib := priv_lib ; loop res.indices := res.indices.push(indices.elt!) end ; res.acopy(self) ; SYS::destroy(self) -- old one shouldn't be used now. end ; res.aset(loc_index,elem) ; return res end ; plus( fstr : FRUNES ) : SAME pre (index_lib = fstr.index_lib) post ~void(result) and ((void(self) and (result.size = str.size)) or (result.size = initial(size) + str.size)) is -- This routine appends the rune string fstr to self and returns it. res : SAME := raw_plus(fstr) ; buffer_scan ; return res end ; plus( str : SAME ) : SAME pre ~void(self) and ~void(str) and (priv_lib = str.priv_lib) post ~void(result) and ((void(self) and (result.size = str.size)) or (result.size = initial(size) + str.size)) is -- This routine appends the string str to self and returns the resulting -- string. res : SAME := raw_plus(str) ; buffer_scan ; return res end ; private store_index( elem_index : CARD ) : CARD pre (elem_index < indices.size) post result = indices[elem_index] is -- This routine returns the store index corresponding to the given -- element_index for use where they may be different). return indices[elem_index] end ; aget( index : CARD ) : RUNE pre ~void(self) and (index < indices.size - 1) post true is -- This routine is the 'array' indexing facility for runes in a string -- of runes, returning the rune indexed. loc_res : BINSTR := BINSTR::create ; loop loc_index : CARD := indices[index].upto!(indices[index + 1] - 1) ; loc_res := loc_res + oct_aget(loc_index) end ; return RUNE::build(loc_res.cursor,index_lib) end ; aset( index : CARD, elem : RUNE ) pre ~void(self) and ((index < (indices.size - 1)) and ((indices[index + 1] - indices[index]) = elem.size) or ((index = (indices.size - 1)) and ((indices[index] + elem.size) <= asize))) post true is -- This routine is the 'array' indexing facility for a rune string which -- is only applicable if it is known that either the element to be set is at -- the end of the current contents and there is space for the element or that -- the size of the element to be inserted is identical to the size of that -- being replaced. if index = (indices.size - 1) then indices := indices.push(indices[index] + elem.asize) end ; loop loc_index : CARD := indices[index].up! ; oct_aset(loc_index,elem.aelt!) end end ; vset( index : CARD, elem : RUNE ) : SAME pre ~void(self) and (index <= (indices.size - 1)) post true is -- This routine is the 'array' indexing facility for the case where -- the number of codes in elem is different from the number currently at that -- index position in the string. A new rune string has to be produced if the -- resulting length is different from that currently allocated. if index = (indices.size - 1) and (indices[index] + elem.size) < asize then aset(index,elem) ; return self else return head(index - 1) + elem + tail((indices.size - 1) - index) end end ; rune( index : CARD ) : RUNE pre (index < (asize - 1)) post result = [index] is -- This routine returns the value of the rune at the given index. return [index] end ; char( index : CARD ) : RUNE pre (index < (asize - 1)) post result = [index] is -- This routine is a synonym for rune above. It is included to match -- the required abstract interface definition. return [index] end ; binstr : BINSTR pre ~void(self) post ~void(result) is -- This routine just returns a copy of self. It is provided for cases -- where it is necessary to put a text string into a binary stream of some -- kind. res : FBINSTR := FBINSTR::create ; loop res := res + aelt! end ; return res.binstr end ; ucs2 : BINSTR pre ~void(self) post (result.size = 2 * size) -- two octets per code is -- This routine returns a copy of self which is the UCS2 binary form. -- It is provided for cases where it is necessary to put the string into that -- form. No count is included. If it is not possible to represent the rune -- string in UCS2 coding then void is returned. res : BINSTR := BINSTR::create ; loop ucs4_code : CARD := code!.card ; if ucs4_code > HEXTET::Hextet_Max then return void end ; res := res + HEXTET::create(ucs4_code).binstr end ; return res end ; convert( lib : LIBCHARS ) : SAME pre ~void(self) and ~void(lib) post true is -- This routine converts self to be in the given encoding and -- repertoire. If any character has no corresponding code then void is -- returned. return CODE_CONVERTER::runes(lib,self) end ; private do_replace( old_ch, new_ch : RUNE ) : SAME is -- This routine returns a copy of self which has had every occurrence of -- old_ch replaced by new_ch. loc_res : CODE_STR := CODE_STR::create(index_lib) ; loop ch : RUNE := elt! ; if ch = old_ch then ch := new_ch end ; loc_res := loc_res + ch.code end ; return loc_res.tgt_runes end ; replace( set : SAME, new_ch : RUNE ) : SAME pre ~void(set) and ~void(self) and (set.priv_lib = priv_lib) and (new_ch.lib = index_lib) post (result.size = self.size) -- and replacement done! is -- This routine returns a copy of self in which all occurrences of -- characters in set are replaced by new_ch. Self may be void. loc_res : CODE_STR := CODE_STR::create(index_lib) ; loop ch : RUNE := elt! ; if set.contains(ch) then ch := new_ch end ; loc_res := loc_res + ch.code end ; return loc_res.tgt_runes end ; escape( esc : RUNE, elist : SAME ) : SAME pre ~void(self) and ~void(elist) and (priv_lib = elist.priv_lib) and (esc.lib = index_lib) post (result.contains(esc) or (result = self)) is -- This routine returns a copy of self in which all characters occurring -- in elist and the character esc itself are preceded by the escape character. -- This is done in situ using a fast string. buf : FRUNES := FRUNES::create(asize/index_lib.my_size) ; loop loc_ch : RUNE := elt! ; if elist.contains(loc_ch) or (loc_ch = esc) then buf := buf + esc end ; buf := buf + loc_ch end ; return from_frunes(buf) end ; set!( rn : RUNE ) pre ~void(self) and (rn.lib = index_lib) post true is -- This iter sets successive elements of self to the given value. loc_oct_index : CARD := 0 ; loop loc_size : CARD := rn.num_codes * index_lib.my_size ; if loc_size + loc_oct_index < asize then loop aset!(loc_oct_index,rn.aelt!) end ; loc_oct_index := loc_oct_index + loc_size ; yield else break! end end ; buffer_scan end ; split!( once rn : RUNE ) : RUNES pre ~void(self) and (rn.lib = index_lib) post (result.size >= 0) -- may be zero if two adjacent rn runes found. is -- This iter yields successive substrings of self which are separated -- by the single rune rn. The separating runes are omitted and the string -- yielded is from that after the previous separating character up to and -- NOT containing the next (or the end of self if not found) curr_loc : CARD := 0 ; -- Start of next string loop next_loc : CARD := search(rn,curr_loc) ; if next_loc /= CARD::nil then -- The character was found yield substring(curr_loc,next_loc - curr_loc) ; curr_loc := next_loc + 1 else -- not found so use rest of string yield substring(curr_loc,size - curr_loc) ; quit end end end ; separate!( rns : SAME ) : SAME pre ~void(rns) and ~void(self) and (priv_lib = rns.priv_lib) post (result = rns) or (result = self + rns) is -- On the first iteration just outputs rns, on successive iterations it -- outputs self followed by rns. Useful for forming lists, eg -- -- loop -- #OUT + comma.separate!(a.elt!) -- end ; yield rns ; loop yield self + rns end end ; text_str( lib : LIBCHARS ) : STR pre ~void(self) and ~void(lib) post result.size > 0 is -- This routine returns a string representation of self in the given -- repertoire and encoding as a space separated sequence of hex numbers. res : STR := STR::create ; loop loc_rune : RUNE := elt! ; loop res := res + lib.Space.str.separate!(loc_rune.code!.card.hex_str) end ; res := res + lib.Comma.char end ; return res end ; text_str : STR pre ~void(self) post result.size > 0 is -- This routine returns a string representation of self in the default -- repertoire and encoding as a space separated sequence of hex numbers. return text_str(LIBCHARS::default) end ; str( lib : LIBCHARS ) : STR pre ~void(self) and ~void(lib) post result.size > 0 is -- This routine creates a new string which is a literal copy of the -- codes of self, interpreted as being in the given encoding and repertoire. res : STR := STR::create(lib) ; loop index : CARD := 0.up! ; loc_elem : CHAR := elt!.char ; if ~void(loc_elem) and lib.culture.charmap.valid(loc_elem.code) then res := res + loc_elem end end ; return res end ; str : STR pre ~void(self) post (result.size = size) is -- Create an 8-bit coded string from an array of RUNE. The resultant -- string is void if any rune value is greater than can be encoded in the -- local default encoding and repertoire. -- NOTE This routine does NOT convert any encoding. The existing encoding is -- interpreted in the default environment. Any invalid codes are -- silently omitted. loc_lib : LIBCHARS := LIBCHARS::default ; res : STR := STR::create(loc_lib) ; loop index : CARD := 0.up! ; loc_elem : CHAR := elt!.char ; if ~void(loc_elem) and loc_lib.culture.charmap.valid(loc_elem.code) then res := res + loc_elem end end ; return res end ; end ; -- RUNES