001    /* URLEncoder.java -- Class to convert strings to a properly encoded URL
002       Copyright (C) 1998, 1999, 2001, 2002, 2003 Free Software Foundation, Inc.
003    
004    This file is part of GNU Classpath.
005    
006    GNU Classpath is free software; you can redistribute it and/or modify
007    it under the terms of the GNU General Public License as published by
008    the Free Software Foundation; either version 2, or (at your option)
009    any later version.
010    
011    GNU Classpath is distributed in the hope that it will be useful, but
012    WITHOUT ANY WARRANTY; without even the implied warranty of
013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014    General Public License for more details.
015    
016    You should have received a copy of the GNU General Public License
017    along with GNU Classpath; see the file COPYING.  If not, write to the
018    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019    02110-1301 USA.
020    
021    Linking this library statically or dynamically with other modules is
022    making a combined work based on this library.  Thus, the terms and
023    conditions of the GNU General Public License cover the whole
024    combination.
025    
026    As a special exception, the copyright holders of this library give you
027    permission to link this library with independent modules to produce an
028    executable, regardless of the license terms of these independent
029    modules, and to copy and distribute the resulting executable under
030    terms of your choice, provided that you also meet, for each linked
031    independent module, the terms and conditions of the license of that
032    module.  An independent module is a module which is not derived from
033    or based on this library.  If you modify this library, you may extend
034    this exception to your version of the library, but you are not
035    obligated to do so.  If you do not wish to do so, delete this
036    exception statement from your version. */
037    
038    package java.net;
039    
040    import java.io.UnsupportedEncodingException;
041    
042    
043    /*
044     * Written using on-line Java Platform 1.2/1.4 API Specification, as well
045     * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998).
046     * Status:  Believed complete and correct.
047     */
048    
049    /**
050     * This utility class contains static methods that converts a
051     * string into a fully encoded URL string in x-www-form-urlencoded
052     * format.  This format replaces certain disallowed characters with
053     * encoded equivalents.  All upper case and lower case letters in the
054     * US alphabet remain as is, the space character (' ') is replaced with
055     * '+' sign, and all other characters are converted to a "%XX" format
056     * where XX is the hexadecimal representation of that character in a
057     * certain encoding (by default, the platform encoding, though the
058     * standard is "UTF-8").
059     * <p>
060     * This method is very useful for encoding strings to be sent to CGI scripts
061     *
062     * @author Aaron M. Renn (arenn@urbanophile.com)
063     * @author Warren Levy (warrenl@cygnus.com)
064     * @author Mark Wielaard (mark@klomp.org)
065     */
066    public class URLEncoder
067    {
068      /**
069       * This method translates the passed in string into x-www-form-urlencoded
070       * format using the default encoding.  The standard encoding is
071       * "UTF-8", and the two-argument form of this method should be used
072       * instead.
073       *
074       * @param s The String to convert
075       *
076       * @return The converted String
077       *
078       * @deprecated
079       */
080      public static String encode(String s)
081      {
082        try
083          {
084            // We default to 8859_1 for compatibility with the same
085            // default elsewhere in the library.
086            return encode(s, System.getProperty("file.encoding", "8859_1"));
087          }
088        catch (UnsupportedEncodingException uee)
089          {
090            // Should never happen since default should always be supported
091            return s;
092          }
093      }
094    
095      /**
096       * This method translates the passed in string into x-www-form-urlencoded
097       * format using the character encoding to hex-encode the unsafe characters.
098       *
099       * @param s The String to convert
100       * @param encoding The encoding to use for unsafe characters
101       *
102       * @return The converted String
103       *
104       * @exception UnsupportedEncodingException If the named encoding is not
105       * supported
106       *
107       * @since 1.4
108       */
109      public static String encode(String s, String encoding)
110        throws UnsupportedEncodingException
111      {
112        int length = s.length();
113        int start = 0;
114        int i = 0;
115    
116        StringBuffer result = new StringBuffer(length);
117        while (true)
118          {
119            while (i < length && isSafe(s.charAt(i)))
120              i++;
121    
122            // Safe character can just be added
123            result.append(s.substring(start, i));
124    
125            // Are we done?
126            if (i >= length)
127              return result.toString();
128            else if (s.charAt(i) == ' ')
129              {
130                result.append('+'); // Replace space char with plus symbol.
131                i++;
132              }
133            else
134              {
135                // Get all unsafe characters
136                start = i;
137                char c;
138                while (i < length && (c = s.charAt(i)) != ' ' && ! isSafe(c))
139                  i++;
140    
141                // Convert them to %XY encoded strings
142                String unsafe = s.substring(start, i);
143                byte[] bytes = unsafe.getBytes(encoding);
144                for (int j = 0; j < bytes.length; j++)
145                  {
146                    result.append('%');
147                    int val = bytes[j];
148                    result.append(hex.charAt((val & 0xf0) >> 4));
149                    result.append(hex.charAt(val & 0x0f));
150                  }
151              }
152            start = i;
153          }
154      }
155    
156      /**
157       * Private static method that returns true if the given char is either
158       * a uppercase or lowercase letter from 'a' till 'z', or a digit froim
159       * '0' till '9', or one of the characters '-', '_', '.' or '*'. Such
160       * 'safe' character don't have to be url encoded.
161       */
162      private static boolean isSafe(char c)
163      {
164        return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
165               || (c >= '0' && c <= '9') || c == '-' || c == '_' || c == '.'
166               || c == '*');
167      }
168    
169      /**
170       * Private constructor that does nothing. Included to avoid a default
171       * public constructor being created by the compiler.
172       */
173      private URLEncoder()
174      {
175      }
176    
177      /**
178       * Used to convert to hex.  We don't use Integer.toHexString, since
179       * it converts to lower case (and the Sun docs pretty clearly
180       * specify upper case here), and because it doesn't provide a
181       * leading 0.
182       */
183      private static final String hex = "0123456789ABCDEF";
184    }