Audacious $Id:Doxyfile42802007-03-2104:39:00Znenolod$
audstrings.c
Go to the documentation of this file.
00001 /*  Audacious
00002  *  Copyright (C) 2005-2009  Audacious development team.
00003  *
00004  *  BMP - Cross-platform multimedia player
00005  *  Copyright (C) 2003-2004  BMP development team.
00006  *
00007  *  Based on XMMS:
00008  *  Copyright (C) 1998-2003  XMMS development team.
00009  *
00010  *  This program is free software; you can redistribute it and/or modify
00011  *  it under the terms of the GNU General Public License as published by
00012  *  the Free Software Foundation; under version 3 of the License.
00013  *
00014  *  This program is distributed in the hope that it will be useful,
00015  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  *  GNU General Public License for more details.
00018  *
00019  *  You should have received a copy of the GNU General Public License
00020  *  along with this program.  If not, see <http://www.gnu.org/licenses>.
00021  *
00022  *  The Audacious team does not consider modular code linking to
00023  *  Audacious or using our public API to be a derived work.
00024  */
00025 
00026 #ifdef HAVE_CONFIG_H
00027 #  include "config.h"
00028 #endif
00029 
00030 #include "audstrings.h"
00031 
00032 #include <stdio.h>
00033 #include <glib.h>
00034 #include <audacious/i18n.h>
00035 #include <string.h>
00036 #include <ctype.h>
00037 
00044 static gchar *
00045 str_replace_drive_letter(gchar * str)
00046 {
00047     gchar *match, *match_end;
00048 
00049     g_return_val_if_fail(str != NULL, NULL);
00050 
00051     while ((match = strstr(str, ":\\")) != NULL) {
00052         match--;
00053         match_end = match + 3;
00054         *match++ = '/';
00055         while (*match_end)
00056             *match++ = *match_end++;
00057         *match = 0; /* the end of line */
00058     }
00059 
00060     return str;
00061 }
00062 
00063 gchar *
00064 str_append(gchar * str, const gchar * add_str)
00065 {
00066     return str_replace(str, g_strconcat(str, add_str, NULL));
00067 }
00068 
00069 gchar *
00070 str_replace(gchar * str, gchar * new_str)
00071 {
00072     g_free(str);
00073     return new_str;
00074 }
00075 
00076 void
00077 str_replace_in(gchar ** str, gchar * new_str)
00078 {
00079     *str = str_replace(*str, new_str);
00080 }
00081 
00082 gboolean
00083 str_has_prefix_nocase(const gchar * str, const gchar * prefix)
00084 {
00085     /* strncasecmp causes segfaults when str is NULL*/
00086     return (str != NULL && (strncasecmp(str, prefix, strlen(prefix)) == 0));
00087 }
00088 
00089 gboolean str_has_suffix_nocase (const gchar * str, const gchar * suffix)
00090 {
00091     return (str && strlen (str) >= strlen (suffix) && ! strcasecmp (str + strlen
00092      (str) - strlen (suffix), suffix));
00093 }
00094 
00095 gboolean
00096 str_has_suffixes_nocase(const gchar * str, gchar * const *suffixes)
00097 {
00098     gchar *const *suffix;
00099 
00100     g_return_val_if_fail(str != NULL, FALSE);
00101     g_return_val_if_fail(suffixes != NULL, FALSE);
00102 
00103     for (suffix = suffixes; *suffix; suffix++)
00104         if (str_has_suffix_nocase(str, *suffix))
00105             return TRUE;
00106 
00107     return FALSE;
00108 }
00109 
00110 static gchar * (* str_to_utf8_impl) (const gchar *) = NULL;
00111 static gchar * (* str_to_utf8_full_impl) (const gchar *, gssize, gsize *,
00112  gsize *, GError * *) = NULL;
00113 
00114 void str_set_utf8_impl (gchar * (* stu_impl) (const gchar *),
00115  gchar * (* stuf_impl) (const gchar *, gssize, gsize *, gsize *, GError * *))
00116 {
00117     str_to_utf8_impl = stu_impl;
00118     str_to_utf8_full_impl = stuf_impl;
00119 }
00120 
00128 gchar * str_to_utf8 (const gchar * str)
00129 {
00130     g_return_val_if_fail (str_to_utf8_impl, NULL);
00131     return str_to_utf8_impl (str);
00132 }
00133 
00134 gchar * str_to_utf8_full (const gchar * str, gssize len, gsize * bytes_read,
00135  gsize * bytes_written, GError * * err)
00136 {
00137     g_return_val_if_fail (str_to_utf8_full_impl, NULL);
00138     return str_to_utf8_full_impl (str, len, bytes_read, bytes_written, err);
00139 }
00140 
00141 #ifdef HAVE_EXECINFO_H
00142 # include <execinfo.h>
00143 #endif
00144 
00159 gchar *
00160 str_assert_utf8(const gchar * str)
00161 {
00162     /* NULL in NULL out */
00163     if (str == NULL)
00164         return NULL;
00165 
00166     /* already UTF-8? */
00167     if (!g_utf8_validate(str, -1, NULL)) {
00168 #if defined(HAVE_EXECINFO_H) && defined(HAVE_BACKTRACE)
00169         gint i, nsymbols;
00170         const gint nsymmax = 50;
00171         void *addrbuf[nsymmax];
00172         gchar **symbols;
00173         nsymbols = backtrace(addrbuf, nsymmax);
00174         symbols = backtrace_symbols(addrbuf, nsymbols);
00175 
00176         fprintf(stderr, "String '%s' was not UTF-8! Backtrace (%d):\n", str, nsymbols);
00177 
00178         for (i = 0; i < nsymbols; i++)
00179             fprintf(stderr, "  #%d: %s\n", i, symbols[i]);
00180 
00181         free(symbols);
00182 #else
00183         g_warning("String '%s' was not UTF-8!", str);
00184 #endif
00185         return str_to_utf8(str);
00186     } else
00187         return g_strdup(str);
00188 }
00189 
00190 
00191 const gchar *
00192 str_skip_chars(const gchar * str, const gchar * chars)
00193 {
00194     while (strchr(chars, *str) != NULL)
00195         str++;
00196     return str;
00197 }
00198 
00199 gchar *
00200 convert_dos_path(gchar * path)
00201 {
00202     g_return_val_if_fail(path != NULL, NULL);
00203 
00204     /* replace drive letter with '/' */
00205     str_replace_drive_letter(path);
00206 
00207     /* replace '\' with '/' */
00208     string_replace_char (path, '\\', '/');
00209 
00210     return path;
00211 }
00212 
00225 gchar *
00226 filename_get_subtune(const gchar * filename, gint * track)
00227 {
00228     gchar *pos;
00229 
00230     if ((pos = strrchr(filename, '?')) != NULL)
00231     {
00232         const gchar *s = pos + 1;
00233         while (*s != '\0' && g_ascii_isdigit(*s)) s++;
00234         if (*s == '\0') {
00235             if (track != NULL)
00236                 *track = atoi(pos + 1);
00237             return pos;
00238         }
00239     }
00240 
00241     return NULL;
00242 }
00243 
00256 gchar *
00257 filename_split_subtune(const gchar * filename, gint * track)
00258 {
00259     gchar *result;
00260     gchar *pos;
00261 
00262     g_return_val_if_fail(filename != NULL, NULL);
00263 
00264     result = g_strdup(filename);
00265     g_return_val_if_fail(result != NULL, NULL);
00266 
00267     if ((pos = filename_get_subtune(result, track)) != NULL)
00268         *pos = '\0';
00269 
00270     return result;
00271 }
00272 
00273 void string_replace_char (gchar * string, gchar old_str, gchar new_str)
00274 {
00275     while ((string = strchr (string, old_str)) != NULL)
00276         * string = new_str;
00277 }
00278 
00279 static inline gchar get_hex_digit (const gchar * * get)
00280 {
00281     gchar c = * * get;
00282 
00283     if (! c)
00284         return 0;
00285 
00286     (* get) ++;
00287 
00288     if (c < 'A')
00289         return c - '0';
00290     if (c < 'a')
00291         return c - 'A' + 10;
00292 
00293     return c - 'a' + 10;
00294 }
00295 
00296 /* Requires that the destination be large enough to hold the decoded string.
00297  * The source and destination may be the same string.  USE EXTREME CAUTION. */
00298 
00299 static void string_decode_percent_2 (const gchar * from, gchar * to)
00300 {
00301     gchar c;
00302     while ((c = * from ++))
00303         * to ++ = (c != '%') ? c : ((get_hex_digit (& from) << 4) | get_hex_digit
00304          (& from));
00305 
00306     * to = 0;
00307 }
00308 
00309 /* Decodes a percent-encoded string in-place. */
00310 
00311 void string_decode_percent (gchar * s)
00312 {
00313     string_decode_percent_2 (s, s);
00314 }
00315 
00316 /* We encode any character except the "unreserved" characters of RFC 3986 and
00317  * (optionally) the forward slash.  On Windows, we also (optionally) do not
00318  * encode the colon. */
00319 static gboolean is_legal_char (gchar c, gboolean is_filename)
00320 {
00321     return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <=
00322      '9') || (strchr ("-_.~", c) != NULL) ||
00323 #ifdef _WIN32
00324      (is_filename && strchr ("/:", c) != NULL);
00325 #else
00326      (is_filename && c == '/');
00327 #endif
00328 }
00329 
00330 static gchar make_hex_digit (gint i)
00331 {
00332     if (i < 10)
00333         return '0' + i;
00334     else
00335         return ('A' - 10) + i;
00336 }
00337 
00338 /* is_filename specifies whether the forward slash should be left intact */
00339 /* returns string allocated with g_malloc */
00340 gchar * string_encode_percent (const gchar * string, gboolean is_filename)
00341 {
00342     gint length = 0;
00343     const gchar * get;
00344     gchar c;
00345     gchar * new, * set;
00346 
00347     for (get = string; (c = * get); get ++)
00348     {
00349         if (is_legal_char (c, is_filename))
00350             length ++;
00351         else
00352             length += 3;
00353     }
00354 
00355     new = g_malloc (length + 1);
00356     set = new;
00357 
00358     for (get = string; (c = * get); get ++)
00359     {
00360         if (is_legal_char (c, is_filename))
00361             * set ++ = c;
00362         else
00363         {
00364             * set ++ = '%';
00365             * set ++ = make_hex_digit (((guchar) c) >> 4);
00366             * set ++ = make_hex_digit (c & 0xF);
00367         }
00368     }
00369 
00370     * set = 0;
00371     return new;
00372 }
00373 
00374 /* Determines whether a URI is valid UTF-8.  If not and <warn> is nonzero,
00375  * prints a warning to stderr. */
00376 
00377 gboolean uri_is_utf8 (const gchar * uri, gboolean warn)
00378 {
00379     gchar buf[strlen (uri) + 1];
00380     string_decode_percent_2 (uri, buf);
00381 
00382     if (g_utf8_validate (buf, -1, NULL))
00383         return TRUE;
00384 
00385     if (warn)
00386         fprintf (stderr, "URI is not UTF-8: %s.\n", buf);
00387 
00388     return FALSE;
00389 }
00390 
00391 /* Converts a URI to UTF-8 encoding.  The returned URI must be freed with g_free.
00392  *
00393  * Note: The function intentionally converts only URI's that are encoded in the
00394  * system locale and refer to local files.
00395  *
00396  * Rationale:
00397  *
00398  * 1. Local files.  The URI was probably created by percent-encoding a raw
00399  *    filename.
00400  *    a. If that filename was in the system locale, then we can convert the URI
00401  *       to a UTF-8 one, allowing us to display the name correctly and to access
00402  *       the file by converting back to the system locale.
00403  *    b. If that filename was in a different locale (perhaps copied from another
00404  *       machine), then we do not want to convert it to UTF-8 (even assuming we
00405  *       can do so correctly), because we will not know what encoding to convert
00406  *       back to when we want to access the file.
00407  * 2. Remote files.  The URI was probably created by percent-encoding a raw
00408  *    filename in whatever locale the remote system is using.  We do not want
00409  *    to convert it to UTF-8 because we do not know whether the remote system
00410  *    can handle UTF-8 requests. */
00411 
00412 gchar * uri_to_utf8 (const gchar * uri)
00413 {
00414     if (strncmp (uri, "file://", 7))
00415         return g_strdup (uri);
00416 
00417     /* recover the raw filename */
00418     gchar buf[strlen (uri + 7) + 1];
00419     string_decode_percent_2 (uri + 7, buf);
00420 
00421     /* convert it to a URI again, in UTF-8 if possible */
00422     return filename_to_uri (buf);
00423 }
00424 
00425 /* Check that a URI is valid UTF-8.  If not, prints a warning to stderr if
00426  * <warn> is nonzero, frees the old URI with g_free, and sets <uri> to the
00427  * converted URI, which must be freed with g_free when no longer needed. */
00428 
00429 void uri_check_utf8 (gchar * * uri, gboolean warn)
00430 {
00431     if (uri_is_utf8 (* uri, warn))
00432         return;
00433 
00434     gchar * copy = uri_to_utf8 (* uri);
00435     g_free (* uri);
00436     * uri = copy;
00437 }
00438 
00439 /* Like g_filename_to_uri, but converts the filename from the system locale to
00440  * UTF-8 before percent-encoding.  On Windows, replaces '\' with '/' and adds a
00441  * leading '/'. */
00442 
00443 gchar * filename_to_uri (const gchar * name)
00444 {
00445     gchar * utf8 = g_locale_to_utf8 (name, -1, NULL, NULL, NULL);
00446 #ifdef _WIN32
00447     string_replace_char (utf8, '\\', '/');
00448 #endif
00449     gchar * enc = string_encode_percent (utf8 ? utf8 : name, TRUE);
00450     g_free (utf8);
00451 #ifdef _WIN32
00452     gchar * uri = g_strdup_printf ("file:///%s", enc);
00453 #else
00454     gchar * uri = g_strdup_printf ("file://%s", enc);
00455 #endif
00456     g_free (enc);
00457     return uri;
00458 }
00459 
00460 /* Like g_filename_from_uri, but converts the filename from UTF-8 to the system
00461  * locale after percent-decoding.  On Windows, strips the leading '/' and
00462  * replaces '/' with '\'. */
00463 
00464 gchar * uri_to_filename (const gchar * uri)
00465 {
00466 #ifdef _WIN32
00467     g_return_val_if_fail (! strncmp (uri, "file:///", 8), NULL);
00468     gchar buf[strlen (uri + 8) + 1];
00469     string_decode_percent_2 (uri + 8, buf);
00470 #else
00471     g_return_val_if_fail (! strncmp (uri, "file://", 7), NULL);
00472     gchar buf[strlen (uri + 7) + 1];
00473     string_decode_percent_2 (uri + 7, buf);
00474 #endif
00475 #ifdef _WIN32
00476     string_replace_char (buf, '/', '\\');
00477 #endif
00478     gchar * name = g_locale_from_utf8 (buf, -1, NULL, NULL, NULL);
00479     return name ? name : g_strdup (buf);
00480 }
00481 
00482 /* Formats a URI for human-readable display.  Percent-decodes and converts to
00483  * UTF-8 (more aggressively than uri_to_utf8).  For file:// URI's, converts to
00484  * filename format (but in UTF-8). */
00485 
00486 gchar * uri_to_display (const gchar * uri)
00487 {
00488     gchar buf[strlen (uri) + 1];
00489 
00490 #ifdef _WIN32
00491     if (! strncmp (uri, "file:///", 8))
00492     {
00493         string_decode_percent_2 (uri + 8, buf);
00494         string_replace_char (buf, '/', '\\');
00495     }
00496 #else
00497     if (! strncmp (uri, "file://", 7))
00498         string_decode_percent_2 (uri + 7, buf);
00499 #endif
00500     else
00501         string_decode_percent_2 (uri, buf);
00502 
00503     return str_to_utf8 (buf);
00504 }
00505 
00506 gchar * uri_get_extension (const gchar * uri)
00507 {
00508     const gchar * slash = strrchr (uri, '/');
00509     if (! slash)
00510         return NULL;
00511     
00512     gchar * lower = g_ascii_strdown (slash + 1, -1);
00513 
00514     gchar * qmark = strchr (lower, '?');
00515     if (qmark)
00516         * qmark = 0;
00517 
00518     gchar * dot = strrchr (lower, '.');
00519     gchar * ext = dot ? g_strdup (dot + 1) : NULL;
00520     
00521     g_free (lower);
00522     return ext;
00523 }
00524 
00525 void string_cut_extension(gchar *string)
00526 {
00527     gchar *period = strrchr(string, '.');
00528 
00529     if (period != NULL)
00530         *period = 0;
00531 }
00532 
00533 /* Like strcasecmp, but orders numbers correctly (2 before 10). */
00534 /* Non-ASCII characters are treated exactly as is. */
00535 /* Handles NULL gracefully. */
00536 
00537 gint string_compare (const gchar * ap, const gchar * bp)
00538 {
00539     if (ap == NULL)
00540         return (bp == NULL) ? 0 : -1;
00541     if (bp == NULL)
00542         return 1;
00543 
00544     guchar a = * ap ++, b = * bp ++;
00545     for (; a || b; a = * ap ++, b = * bp ++)
00546     {
00547         if (a > '9' || b > '9' || a < '0' || b < '0')
00548         {
00549             if (a <= 'Z' && a >= 'A')
00550                 a += 'a' - 'A';
00551             if (b <= 'Z' && b >= 'A')
00552                 b += 'a' - 'A';
00553 
00554             if (a > b)
00555                 return 1;
00556             if (a < b)
00557                 return -1;
00558         }
00559         else
00560         {
00561             gint x = a - '0';
00562             for (; (a = * ap) <= '9' && a >= '0'; ap ++)
00563                 x = 10 * x + (a - '0');
00564 
00565             gint y = b - '0';
00566             for (; (b = * bp) >= '0' && b <= '9'; bp ++)
00567                 y = 10 * y + (b - '0');
00568 
00569             if (x > y)
00570                 return 1;
00571             if (x < y)
00572                 return -1;
00573         }
00574     }
00575 
00576     return 0;
00577 }
00578 
00579 /* Decodes percent-encoded strings, then compares then with string_compare. */
00580 
00581 gint string_compare_encoded (const gchar * ap, const gchar * bp)
00582 {
00583     if (ap == NULL)
00584         return (bp == NULL) ? 0 : -1;
00585     if (bp == NULL)
00586         return 1;
00587 
00588     guchar a = * ap ++, b = * bp ++;
00589     for (; a || b; a = * ap ++, b = * bp ++)
00590     {
00591         if (a == '%')
00592             a = (get_hex_digit (& ap) << 4) | get_hex_digit (& ap);
00593         if (b == '%')
00594             b = (get_hex_digit (& bp) << 4) | get_hex_digit (& bp);
00595 
00596         if (a > '9' || b > '9' || a < '0' || b < '0')
00597         {
00598             if (a <= 'Z' && a >= 'A')
00599                 a += 'a' - 'A';
00600             if (b <= 'Z' && b >= 'A')
00601                 b += 'a' - 'A';
00602 
00603             if (a > b)
00604                 return 1;
00605             if (a < b)
00606                 return -1;
00607         }
00608         else
00609         {
00610             gint x = a - '0';
00611             for (; (a = * ap) <= '9' && a >= '0'; ap ++)
00612                 x = 10 * x + (a - '0');
00613 
00614             gint y = b - '0';
00615             for (; (b = * bp) >= '0' && b <= '9'; bp ++)
00616                 y = 10 * y + (b - '0');
00617 
00618             if (x > y)
00619                 return 1;
00620             if (x < y)
00621                 return -1;
00622         }
00623     }
00624 
00625     return 0;
00626 }
00627 
00628 const void * memfind (const void * mem, gint size, const void * token, gint
00629  length)
00630 {
00631     if (! length)
00632         return mem;
00633 
00634     size -= length - 1;
00635 
00636     while (size > 0)
00637     {
00638         const void * maybe = memchr (mem, * (guchar *) token, size);
00639 
00640         if (maybe == NULL)
00641             return NULL;
00642 
00643         if (! memcmp (maybe, token, length))
00644             return maybe;
00645 
00646         size -= (guchar *) maybe + 1 - (guchar *) mem;
00647         mem = (guchar *) maybe + 1;
00648     }
00649 
00650     return NULL;
00651 }
00652 
00653 gchar *
00654 str_replace_fragment(gchar *s, gint size, const gchar *old, const gchar *new)
00655 {
00656     gchar *ptr = s;
00657     gint left = strlen(s);
00658     gint avail = size - (left + 1);
00659     gint oldlen = strlen(old);
00660     gint newlen = strlen(new);
00661     gint diff = newlen - oldlen;
00662 
00663     while (left >= oldlen)
00664     {
00665         if (strncmp(ptr, old, oldlen))
00666         {
00667             left--;
00668             ptr++;
00669             continue;
00670         }
00671 
00672         if (diff > avail)
00673             break;
00674 
00675         if (diff != 0)
00676             memmove(ptr + oldlen + diff, ptr + oldlen, left + 1 - oldlen);
00677 
00678         memcpy(ptr, new, newlen);
00679         ptr += newlen;
00680         left -= oldlen;
00681     }
00682 
00683     return s;
00684 }
00685 
00686 void
00687 string_canonize_case(gchar *str)
00688 {
00689     while (*str)
00690     {
00691         *str = g_ascii_toupper(*str);
00692         str++;
00693     }
00694 }