001/* CollationElementIterator.java -- Walks through collation elements
002   Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004, 2012  Free Software Foundation
003
004This file is part of GNU Classpath.
005
006GNU Classpath is free software; you can redistribute it and/or modify
007it under the terms of the GNU General Public License as published by
008the Free Software Foundation; either version 2, or (at your option)
009any later version.
010
011GNU Classpath is distributed in the hope that it will be useful, but
012WITHOUT ANY WARRANTY; without even the implied warranty of
013MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014General Public License for more details.
015
016You should have received a copy of the GNU General Public License
017along with GNU Classpath; see the file COPYING.  If not, write to the
018Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
01902110-1301 USA.
020
021Linking this library statically or dynamically with other modules is
022making a combined work based on this library.  Thus, the terms and
023conditions of the GNU General Public License cover the whole
024combination.
025
026As a special exception, the copyright holders of this library give you
027permission to link this library with independent modules to produce an
028executable, regardless of the license terms of these independent
029modules, and to copy and distribute the resulting executable under
030terms of your choice, provided that you also meet, for each linked
031independent module, the terms and conditions of the license of that
032module.  An independent module is a module which is not derived from
033or based on this library.  If you modify this library, you may extend
034this exception to your version of the library, but you are not
035obligated to do so.  If you do not wish to do so, delete this
036exception statement from your version. */
037
038
039package java.text;
040
041import gnu.java.lang.CPStringBuilder;
042
043import java.util.ArrayList;
044
045/* Written using "Java Class Libraries", 2nd edition, plus online
046 * API docs for JDK 1.2 from http://www.javasoft.com.
047 * Status: Believed complete and correct to JDK 1.1.
048 */
049
050/**
051 * This class walks through the character collation elements of a
052 * <code>String</code> as defined by the collation rules in an instance of
053 * <code>RuleBasedCollator</code>.  There is no public constructor for
054 * this class.  An instance is created by calling the
055 * <code>getCollationElementIterator</code> method on
056 * <code>RuleBasedCollator</code>.
057 *
058 * @author Aaron M. Renn (arenn@urbanophile.com)
059 * @author Tom Tromey (tromey@cygnus.com)
060 * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
061 */
062public final class CollationElementIterator
063{
064  /**
065   * This is a constant value that is returned to indicate that the end of
066   * the string was encountered.
067   */
068  public static final int NULLORDER = -1;
069
070  /**
071   * This is the RuleBasedCollator this object was created from.
072   */
073  RuleBasedCollator collator;
074
075  /**
076   * This is the String that is being iterated over.
077   */
078  CharacterIterator text;
079
080  /**
081   * This is the index into the collation decomposition where we are currently scanning.
082   */
083  int index;
084
085  /**
086   * This is the index into the String where we are currently scanning.
087   */
088  int textIndex;
089
090  /**
091   * Array containing the collation decomposition of the
092   * text given to the constructor.
093   */
094  private RuleBasedCollator.CollationElement[] textDecomposition;
095
096  /**
097   * Array containing the index of the specified block.
098   */
099  private int[] textIndexes;
100
101  /**
102   * This method initializes a new instance of <code>CollationElementIterator</code>
103   * to iterate over the specified <code>String</code> using the rules in the
104   * specified <code>RuleBasedCollator</code>.
105   *
106   * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
107   * @param text The <code>String</code> to iterate over.
108   */
109  CollationElementIterator(RuleBasedCollator collator, String text)
110  {
111    this.collator = collator;
112
113    setText (text);
114  }
115
116  /**
117   * This method initializes a new instance of <code>CollationElementIterator</code>
118   * to iterate over the specified <code>String</code> using the rules in the
119   * specified <code>RuleBasedCollator</code>.
120   *
121   * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
122   * @param text The character iterator to iterate over.
123   */
124  CollationElementIterator(RuleBasedCollator collator, CharacterIterator text)
125  {
126    this.collator = collator;
127
128    setText (text);
129  }
130
131  RuleBasedCollator.CollationElement nextBlock()
132  {
133    if (index >= textDecomposition.length)
134      return null;
135
136    RuleBasedCollator.CollationElement e = textDecomposition[index];
137
138    textIndex = textIndexes[index+1];
139
140    index++;
141
142    return e;
143  }
144
145  RuleBasedCollator.CollationElement previousBlock()
146  {
147    if (index == 0)
148      return null;
149
150    index--;
151    RuleBasedCollator.CollationElement e = textDecomposition[index];
152
153    textIndex = textIndexes[index+1];
154
155    return e;
156  }
157
158  /**
159   * This method returns the collation ordering value of the next character sequence
160   * in the string (it may be an extended character following collation rules).
161   * This method will return <code>NULLORDER</code> if the
162   * end of the string was reached.
163   *
164   * @return The collation ordering value.
165   */
166  public int next()
167  {
168    RuleBasedCollator.CollationElement e = nextBlock();
169
170    if (e == null)
171      return NULLORDER;
172
173    return e.getValue();
174  }
175
176  /**
177   * This method returns the collation ordering value of the previous character
178   * in the string.  This method will return <code>NULLORDER</code> if the
179   * beginning of the string was reached.
180   *
181   * @return The collation ordering value.
182   */
183  public int previous()
184  {
185    RuleBasedCollator.CollationElement e = previousBlock();
186
187    if (e == null)
188      return NULLORDER;
189
190    return e.getValue();
191  }
192
193  /**
194   * This method returns the primary order value for the given collation
195   * value.
196   *
197   * @param order The collation value returned from <code>next()</code> or
198   *              <code>previous()</code>.
199   *
200   * @return The primary order value of the specified collation value.  This is
201   *         the high 16 bits.
202   */
203  public static int primaryOrder(int order)
204  {
205    // From the JDK 1.2 spec.
206    return order >>> 16;
207  }
208
209  /**
210   * This method resets the internal position pointer to read from the
211   * beginning of the <code>String</code> again.
212   */
213  public void reset()
214  {
215    index = 0;
216    textIndex = 0;
217  }
218
219  /**
220   * This method returns the secondary order value for the given collation
221   * value.
222   *
223   * @param order The collation value returned from <code>next()</code> or
224   *              <code>previous()</code>.
225   *
226   * @return The secondary order value of the specified collation value.  This
227   *         is the bits 8-15.
228   */
229  public static short secondaryOrder(int order)
230  {
231    // From the JDK 1.2 spec.
232    return (short) ((order >>> 8) & 255);
233  }
234
235  /**
236   * This method returns the tertiary order value for the given collation
237   * value.
238   *
239   * @param order The collation value returned from <code>next()</code> or
240   *              <code>previous()</code>.
241   *
242   * @return The tertiary order value of the specified collation value.  This
243   *         is the low eight bits.
244   */
245  public static short tertiaryOrder(int order)
246  {
247    // From the JDK 1.2 spec.
248    return (short) (order & 255);
249  }
250
251  /**
252   * This method sets the <code>String</code> that it is iterating over
253   * to the specified <code>String</code>.
254   *
255   * @param text The new <code>String</code> to iterate over.
256   *
257   * @since 1.2
258   */
259  public void setText(String text)
260  {
261    int idx = 0;
262    int idx_idx = 0;
263    int alreadyExpanded = 0;
264    int idxToMove = 0;
265
266    this.text = new StringCharacterIterator(text);
267    this.index = 0;
268
269    String work_text = text.intern();
270
271    ArrayList<RuleBasedCollator.CollationElement> aElement = new ArrayList<RuleBasedCollator.CollationElement>();
272    ArrayList<Integer> aIdx = new ArrayList<Integer>();
273
274    // Build element collection ordered as they come in "text".
275    while (idx < work_text.length())
276      {
277        String key, keyOld;
278
279        Object object = null;
280        int p = 1;
281
282        // IMPROVE: use a TreeMap with a prefix-ordering rule.
283        keyOld = key = null;
284        do
285          {
286            if (object != null)
287              keyOld = key;
288            key = work_text.substring (idx, idx+p);
289            object = collator.prefix_tree.get (key);
290            if (object != null && idx < alreadyExpanded)
291              {
292                RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
293                if (prefix.expansion != null &&
294                    prefix.expansion.startsWith(work_text.substring(0, idx)))
295                {
296                  object = null;
297                  key = keyOld;
298                }
299              }
300            p++;
301          }
302        while (idx+p <= work_text.length());
303
304        if (object == null)
305          key = keyOld;
306
307        RuleBasedCollator.CollationElement prefix =
308          (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
309
310        /*
311         * First case: There is no such sequence in the database.
312         * We will have to build one from the context.
313         */
314        if (prefix == null)
315          {
316            /*
317             * We are dealing with sequences in an expansion. They
318             * are treated as accented characters (tertiary order).
319             */
320            if (alreadyExpanded > 0)
321              {
322                RuleBasedCollator.CollationElement e =
323                  collator.getDefaultAccentedElement (work_text.charAt (idx));
324
325                aElement.add (e);
326                aIdx.add (Integer.valueOf(idx_idx));
327                idx++;
328                alreadyExpanded--;
329                if (alreadyExpanded == 0)
330                  {
331                    /* There is not any characters left in the expansion set.
332                     * We can increase the pointer in the source string.
333                     */
334                    idx_idx += idxToMove;
335                    idxToMove = 0;
336                  }
337                else
338                  idx_idx++;
339              }
340            else
341              {
342                /* This is a normal character. */
343                RuleBasedCollator.CollationElement e =
344                  collator.getDefaultElement (work_text.charAt (idx));
345                Integer iRef = Integer.valueOf(idx_idx);
346
347                /* Don't forget to mark it as a special sequence so the
348                 * string can be ordered.
349                 */
350                aElement.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
351                aIdx.add (iRef);
352                aElement.add (e);
353                aIdx.add (iRef);
354                idx_idx++;
355                idx++;
356              }
357            continue;
358          }
359
360        /*
361         * Second case: Here we have found a matching sequence.
362         * Here we have an expansion string prepend it to the "work text" and
363         * add the corresponding sorting element. We must also mark
364         */
365        if (prefix.expansion != null)
366          {
367            work_text = prefix.expansion
368              + work_text.substring (idx+prefix.key.length());
369            idx = 0;
370            aElement.add (prefix);
371            aIdx.add (Integer.valueOf(idx_idx));
372            if (alreadyExpanded == 0)
373              idxToMove = prefix.key.length();
374            alreadyExpanded += prefix.expansion.length()-prefix.key.length();
375          }
376        else
377          {
378            /* Third case: the simplest. We have got the prefix and it
379             * has not to be expanded.
380             */
381            aElement.add (prefix);
382            aIdx.add (Integer.valueOf(idx_idx));
383            idx += prefix.key.length();
384            /* If the sequence is in an expansion, we must decrease the
385             * counter.
386             */
387            if (alreadyExpanded > 0)
388              {
389                alreadyExpanded -= prefix.key.length();
390                if (alreadyExpanded == 0)
391                  {
392                    idx_idx += idxToMove;
393                    idxToMove = 0;
394                  }
395              }
396            else
397              idx_idx += prefix.key.length();
398          }
399      }
400
401    textDecomposition = aElement.toArray(new RuleBasedCollator.CollationElement[aElement.size()]);
402    textIndexes = new int[aIdx.size()+1];
403    for (int i = 0; i < aIdx.size(); i++)
404      {
405        textIndexes[i] = aIdx.get(i).intValue();
406      }
407    textIndexes[aIdx.size()] = text.length();
408  }
409
410  /**
411   * This method sets the <code>String</code> that it is iterating over
412   * to the <code>String</code> represented by the specified
413   * <code>CharacterIterator</code>.
414   *
415   * @param source The <code>CharacterIterator</code> containing the new
416   * <code>String</code> to iterate over.
417   */
418  public void setText(CharacterIterator source)
419  {
420    CPStringBuilder expand = new CPStringBuilder();
421
422    // For now assume we read from the beginning of the string.
423    for (char c = source.first();
424         c != CharacterIterator.DONE;
425         c = source.next())
426      expand.append(c);
427
428    setText(expand.toString());
429  }
430
431  /**
432   * This method returns the current offset into the <code>String</code>
433   * that is being iterated over.
434   *
435   * @return The iteration index position.
436   *
437   * @since 1.2
438   */
439  public int getOffset()
440  {
441    return textIndex;
442  }
443
444  /**
445   * This method sets the iteration index position into the current
446   * <code>String</code> to the specified value.  This value must not
447   * be negative and must not be greater than the last index position
448   * in the <code>String</code>.
449   *
450   * @param offset The new iteration index position.
451   *
452   * @exception IllegalArgumentException If the new offset is not valid.
453   */
454  public void setOffset(int offset)
455  {
456    if (offset < 0)
457      throw new IllegalArgumentException("Negative offset: " + offset);
458
459    if (offset > (text.getEndIndex() - 1))
460      throw new IllegalArgumentException("Offset too large: " + offset);
461
462    for (index = 0; index < textDecomposition.length; index++)
463      {
464        if (offset <= textIndexes[index])
465          break;
466      }
467    /*
468     * As textIndexes[0] == 0, we should not have to take care whether index is
469     * greater than 0. It is always.
470     */
471    if (textIndexes[index] == offset)
472      textIndex = offset;
473    else
474      textIndex = textIndexes[index-1];
475  }
476
477  /**
478   * This method returns the maximum length of any expansion sequence that
479   * ends with the specified collation order value.  (Whatever that means).
480   *
481   * @param value The collation order value
482   *
483   * @return The maximum length of an expansion sequence.
484   */
485  public int getMaxExpansion(int value)
486  {
487    return 1;
488  }
489}