D-Bus 1.2.24
|
00001 /* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*- */ 00002 /* dbus-shell.c Shell command line utility functions. 00003 * 00004 * Copyright (C) 2002, 2003 Red Hat, Inc. 00005 * Copyright (C) 2003 CodeFactory AB 00006 * 00007 * Licensed under the Academic Free License version 2.1 00008 * 00009 * This program is free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 2 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00022 * 00023 */ 00024 00025 #include <string.h> 00026 #include "dbus-internals.h" 00027 #include "dbus-list.h" 00028 #include "dbus-memory.h" 00029 #include "dbus-protocol.h" 00030 #include "dbus-shell.h" 00031 #include "dbus-string.h" 00032 00033 /* Single quotes preserve the literal string exactly. escape 00034 * sequences are not allowed; not even \' - if you want a ' 00035 * in the quoted text, you have to do something like 'foo'\''bar' 00036 * 00037 * Double quotes allow $ ` " \ and newline to be escaped with backslash. 00038 * Otherwise double quotes preserve things literally. 00039 */ 00040 00041 static dbus_bool_t 00042 unquote_string_inplace (char* str, char** end) 00043 { 00044 char* dest; 00045 char* s; 00046 char quote_char; 00047 00048 dest = s = str; 00049 00050 quote_char = *s; 00051 00052 if (!(*s == '"' || *s == '\'')) 00053 { 00054 *end = str; 00055 return FALSE; 00056 } 00057 00058 /* Skip the initial quote mark */ 00059 ++s; 00060 00061 if (quote_char == '"') 00062 { 00063 while (*s) 00064 { 00065 _dbus_assert(s > dest); /* loop invariant */ 00066 00067 switch (*s) 00068 { 00069 case '"': 00070 /* End of the string, return now */ 00071 *dest = '\0'; 00072 ++s; 00073 *end = s; 00074 return TRUE; 00075 00076 case '\\': 00077 /* Possible escaped quote or \ */ 00078 ++s; 00079 switch (*s) 00080 { 00081 case '"': 00082 case '\\': 00083 case '`': 00084 case '$': 00085 case '\n': 00086 *dest = *s; 00087 ++s; 00088 ++dest; 00089 break; 00090 00091 default: 00092 /* not an escaped char */ 00093 *dest = '\\'; 00094 ++dest; 00095 /* ++s already done. */ 00096 break; 00097 } 00098 break; 00099 00100 default: 00101 *dest = *s; 00102 ++dest; 00103 ++s; 00104 break; 00105 } 00106 00107 _dbus_assert(s > dest); /* loop invariant */ 00108 } 00109 } 00110 else 00111 { 00112 while (*s) 00113 { 00114 _dbus_assert(s > dest); /* loop invariant */ 00115 00116 if (*s == '\'') 00117 { 00118 /* End of the string, return now */ 00119 *dest = '\0'; 00120 ++s; 00121 *end = s; 00122 return TRUE; 00123 } 00124 else 00125 { 00126 *dest = *s; 00127 ++dest; 00128 ++s; 00129 } 00130 00131 _dbus_assert(s > dest); /* loop invariant */ 00132 } 00133 } 00134 00135 /* If we reach here this means the close quote was never encountered */ 00136 00137 *dest = '\0'; 00138 00139 *end = s; 00140 return FALSE; 00141 } 00142 00167 char* 00168 _dbus_shell_unquote (const char *quoted_string) 00169 { 00170 char *unquoted; 00171 char *end; 00172 char *start; 00173 char *ret; 00174 DBusString retval; 00175 00176 unquoted = _dbus_strdup (quoted_string); 00177 if (unquoted == NULL) 00178 return NULL; 00179 00180 start = unquoted; 00181 end = unquoted; 00182 if (!_dbus_string_init (&retval)) 00183 { 00184 dbus_free (unquoted); 00185 return NULL; 00186 } 00187 00188 /* The loop allows cases such as 00189 * "foo"blah blah'bar'woo foo"baz"la la la\'\''foo' 00190 */ 00191 while (*start) 00192 { 00193 /* Append all non-quoted chars, honoring backslash escape 00194 */ 00195 00196 while (*start && !(*start == '"' || *start == '\'')) 00197 { 00198 if (*start == '\\') 00199 { 00200 /* all characters can get escaped by backslash, 00201 * except newline, which is removed if it follows 00202 * a backslash outside of quotes 00203 */ 00204 00205 ++start; 00206 if (*start) 00207 { 00208 if (*start != '\n') 00209 { 00210 if (!_dbus_string_append_byte (&retval, *start)) 00211 goto error; 00212 } 00213 ++start; 00214 } 00215 } 00216 else 00217 { 00218 if (!_dbus_string_append_byte (&retval, *start)) 00219 goto error; 00220 ++start; 00221 } 00222 } 00223 00224 if (*start) 00225 { 00226 if (!unquote_string_inplace (start, &end)) 00227 goto error; 00228 else 00229 { 00230 if (!_dbus_string_append (&retval, start)) 00231 goto error; 00232 start = end; 00233 } 00234 } 00235 } 00236 00237 ret = _dbus_strdup (_dbus_string_get_data (&retval)); 00238 if (!ret) 00239 goto error; 00240 00241 dbus_free (unquoted); 00242 _dbus_string_free (&retval); 00243 00244 return ret; 00245 00246 error: 00247 dbus_free (unquoted); 00248 _dbus_string_free (&retval); 00249 return NULL; 00250 } 00251 00252 /* _dbus_shell_parse_argv() does a semi-arbitrary weird subset of the way 00253 * the shell parses a command line. We don't do variable expansion, 00254 * don't understand that operators are tokens, don't do tilde expansion, 00255 * don't do command substitution, no arithmetic expansion, IFS gets ignored, 00256 * don't do filename globs, don't remove redirection stuff, etc. 00257 * 00258 * READ THE UNIX98 SPEC on "Shell Command Language" before changing 00259 * the behavior of this code. 00260 * 00261 * Steps to parsing the argv string: 00262 * 00263 * - tokenize the string (but since we ignore operators, 00264 * our tokenization may diverge from what the shell would do) 00265 * note that tokenization ignores the internals of a quoted 00266 * word and it always splits on spaces, not on IFS even 00267 * if we used IFS. We also ignore "end of input indicator" 00268 * (I guess this is control-D?) 00269 * 00270 * Tokenization steps, from UNIX98 with operator stuff removed, 00271 * are: 00272 * 00273 * 1) "If the current character is backslash, single-quote or 00274 * double-quote (\, ' or ") and it is not quoted, it will affect 00275 * quoting for subsequent characters up to the end of the quoted 00276 * text. The rules for quoting are as described in Quoting 00277 * . During token recognition no substitutions will be actually 00278 * performed, and the result token will contain exactly the 00279 * characters that appear in the input (except for newline 00280 * character joining), unmodified, including any embedded or 00281 * enclosing quotes or substitution operators, between the quote 00282 * mark and the end of the quoted text. The token will not be 00283 * delimited by the end of the quoted field." 00284 * 00285 * 2) "If the current character is an unquoted newline character, 00286 * the current token will be delimited." 00287 * 00288 * 3) "If the current character is an unquoted blank character, any 00289 * token containing the previous character is delimited and the 00290 * current character will be discarded." 00291 * 00292 * 4) "If the previous character was part of a word, the current 00293 * character will be appended to that word." 00294 * 00295 * 5) "If the current character is a "#", it and all subsequent 00296 * characters up to, but excluding, the next newline character 00297 * will be discarded as a comment. The newline character that 00298 * ends the line is not considered part of the comment. The 00299 * "#" starts a comment only when it is at the beginning of a 00300 * token. Since the search for the end-of-comment does not 00301 * consider an escaped newline character specially, a comment 00302 * cannot be continued to the next line." 00303 * 00304 * 6) "The current character will be used as the start of a new word." 00305 * 00306 * 00307 * - for each token (word), perform portions of word expansion, namely 00308 * field splitting (using default whitespace IFS) and quote 00309 * removal. Field splitting may increase the number of words. 00310 * Quote removal does not increase the number of words. 00311 * 00312 * "If the complete expansion appropriate for a word results in an 00313 * empty field, that empty field will be deleted from the list of 00314 * fields that form the completely expanded command, unless the 00315 * original word contained single-quote or double-quote characters." 00316 * - UNIX98 spec 00317 * 00318 * 00319 */ 00320 00321 static dbus_bool_t 00322 delimit_token (DBusString *token, 00323 DBusList **retval, 00324 DBusError *error) 00325 { 00326 char *str; 00327 00328 str = _dbus_strdup (_dbus_string_get_data (token)); 00329 if (!str) 00330 { 00331 _DBUS_SET_OOM (error); 00332 return FALSE; 00333 } 00334 00335 if (!_dbus_list_append (retval, str)) 00336 { 00337 dbus_free (str); 00338 _DBUS_SET_OOM (error); 00339 return FALSE; 00340 } 00341 00342 return TRUE; 00343 } 00344 00345 static DBusList* 00346 tokenize_command_line (const char *command_line, DBusError *error) 00347 { 00348 char current_quote; 00349 const char *p; 00350 DBusString current_token; 00351 DBusList *retval = NULL; 00352 dbus_bool_t quoted;; 00353 00354 current_quote = '\0'; 00355 quoted = FALSE; 00356 p = command_line; 00357 00358 if (!_dbus_string_init (¤t_token)) 00359 { 00360 _DBUS_SET_OOM (error); 00361 return NULL; 00362 } 00363 00364 while (*p) 00365 { 00366 if (current_quote == '\\') 00367 { 00368 if (*p == '\n') 00369 { 00370 /* we append nothing; backslash-newline become nothing */ 00371 } 00372 else 00373 { 00374 if (!_dbus_string_append_byte (¤t_token, '\\') || 00375 !_dbus_string_append_byte (¤t_token, *p)) 00376 { 00377 _DBUS_SET_OOM (error); 00378 goto error; 00379 } 00380 } 00381 00382 current_quote = '\0'; 00383 } 00384 else if (current_quote == '#') 00385 { 00386 /* Discard up to and including next newline */ 00387 while (*p && *p != '\n') 00388 ++p; 00389 00390 current_quote = '\0'; 00391 00392 if (*p == '\0') 00393 break; 00394 } 00395 else if (current_quote) 00396 { 00397 if (*p == current_quote && 00398 /* check that it isn't an escaped double quote */ 00399 !(current_quote == '"' && quoted)) 00400 { 00401 /* close the quote */ 00402 current_quote = '\0'; 00403 } 00404 00405 /* Everything inside quotes, and the close quote, 00406 * gets appended literally. 00407 */ 00408 00409 if (!_dbus_string_append_byte (¤t_token, *p)) 00410 { 00411 _DBUS_SET_OOM (error); 00412 goto error; 00413 } 00414 } 00415 else 00416 { 00417 switch (*p) 00418 { 00419 case '\n': 00420 if (!delimit_token (¤t_token, &retval, error)) 00421 goto error; 00422 00423 _dbus_string_free (¤t_token); 00424 00425 if (!_dbus_string_init (¤t_token)) 00426 { 00427 _DBUS_SET_OOM (error); 00428 goto init_error; 00429 } 00430 00431 break; 00432 00433 case ' ': 00434 case '\t': 00435 /* If the current token contains the previous char, delimit 00436 * the current token. A nonzero length 00437 * token should always contain the previous char. 00438 */ 00439 if (_dbus_string_get_length (¤t_token) > 0) 00440 { 00441 if (!delimit_token (¤t_token, &retval, error)) 00442 goto error; 00443 00444 _dbus_string_free (¤t_token); 00445 00446 if (!_dbus_string_init (¤t_token)) 00447 { 00448 _DBUS_SET_OOM (error); 00449 goto init_error; 00450 } 00451 00452 } 00453 00454 /* discard all unquoted blanks (don't add them to a token) */ 00455 break; 00456 00457 00458 /* single/double quotes are appended to the token, 00459 * escapes are maybe appended next time through the loop, 00460 * comment chars are never appended. 00461 */ 00462 00463 case '\'': 00464 case '"': 00465 if (!_dbus_string_append_byte (¤t_token, *p)) 00466 { 00467 _DBUS_SET_OOM (error); 00468 goto error; 00469 } 00470 00471 /* FALL THRU */ 00472 00473 case '#': 00474 case '\\': 00475 current_quote = *p; 00476 break; 00477 00478 default: 00479 /* Combines rules 4) and 6) - if we have a token, append to it, 00480 * otherwise create a new token. 00481 */ 00482 if (!_dbus_string_append_byte (¤t_token, *p)) 00483 { 00484 _DBUS_SET_OOM (error); 00485 goto error; 00486 } 00487 break; 00488 } 00489 } 00490 00491 /* We need to count consecutive backslashes mod 2, 00492 * to detect escaped doublequotes. 00493 */ 00494 if (*p != '\\') 00495 quoted = FALSE; 00496 else 00497 quoted = !quoted; 00498 00499 ++p; 00500 } 00501 00502 if (!delimit_token (¤t_token, &retval, error)) 00503 goto error; 00504 00505 if (current_quote) 00506 { 00507 dbus_set_error_const (error, DBUS_ERROR_INVALID_ARGS, "Unclosed quotes in command line"); 00508 goto error; 00509 } 00510 00511 if (retval == NULL) 00512 { 00513 dbus_set_error_const (error, DBUS_ERROR_INVALID_ARGS, "No tokens found in command line"); 00514 goto error; 00515 } 00516 00517 _dbus_string_free (¤t_token); 00518 00519 return retval; 00520 00521 error: 00522 _dbus_string_free (¤t_token); 00523 00524 init_error: 00525 if (retval) 00526 { 00527 _dbus_list_foreach (&retval, (DBusForeachFunction) dbus_free, NULL); 00528 _dbus_list_clear (&retval); 00529 } 00530 00531 return NULL; 00532 } 00533 00551 dbus_bool_t 00552 _dbus_shell_parse_argv (const char *command_line, 00553 int *argcp, 00554 char ***argvp, 00555 DBusError *error) 00556 { 00557 /* Code based on poptParseArgvString() from libpopt */ 00558 int argc = 0; 00559 char **argv = NULL; 00560 DBusList *tokens = NULL; 00561 int i; 00562 DBusList *tmp_list; 00563 00564 if (!command_line) 00565 { 00566 _dbus_verbose ("Command line is NULL\n"); 00567 return FALSE; 00568 } 00569 00570 tokens = tokenize_command_line (command_line, error); 00571 if (tokens == NULL) 00572 { 00573 _dbus_verbose ("No tokens for command line '%s'\n", command_line); 00574 return FALSE; 00575 } 00576 00577 /* Because we can't have introduced any new blank space into the 00578 * tokens (we didn't do any new expansions), we don't need to 00579 * perform field splitting. If we were going to honor IFS or do any 00580 * expansions, we would have to do field splitting on each word 00581 * here. Also, if we were going to do any expansion we would need to 00582 * remove any zero-length words that didn't contain quotes 00583 * originally; but since there's no expansion we know all words have 00584 * nonzero length, unless they contain quotes. 00585 * 00586 * So, we simply remove quotes, and don't do any field splitting or 00587 * empty word removal, since we know there was no way to introduce 00588 * such things. 00589 */ 00590 00591 argc = _dbus_list_get_length (&tokens); 00592 argv = dbus_new (char *, argc + 1); 00593 if (!argv) 00594 { 00595 _DBUS_SET_OOM (error); 00596 goto error; 00597 } 00598 00599 i = 0; 00600 tmp_list = tokens; 00601 while (tmp_list) 00602 { 00603 argv[i] = _dbus_shell_unquote (tmp_list->data); 00604 00605 if (!argv[i]) 00606 { 00607 int j; 00608 for (j = 0; j < i; j++) 00609 dbus_free(argv[j]); 00610 00611 dbus_free (argv); 00612 _DBUS_SET_OOM (error); 00613 goto error; 00614 } 00615 00616 tmp_list = _dbus_list_get_next_link (&tokens, tmp_list); 00617 ++i; 00618 } 00619 argv[argc] = NULL; 00620 00621 _dbus_list_foreach (&tokens, (DBusForeachFunction) dbus_free, NULL); 00622 _dbus_list_clear (&tokens); 00623 00624 if (argcp) 00625 *argcp = argc; 00626 00627 if (argvp) 00628 *argvp = argv; 00629 else 00630 dbus_free_string_array (argv); 00631 00632 return TRUE; 00633 00634 error: 00635 _dbus_list_foreach (&tokens, (DBusForeachFunction) dbus_free, NULL); 00636 _dbus_list_clear (&tokens); 00637 00638 return FALSE; 00639 00640 }