xquery_functions.h
Go to the documentation of this file.
00001 /*
00002  * Copyright 2006-2008 The FLWOR Foundation.
00003  *
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  * 
00008  * http://www.apache.org/licenses/LICENSE-2.0
00009  *
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 
00017 #ifndef ZORBA_XQUERY_FUNCTIONS_API_H
00018 #define ZORBA_XQUERY_FUNCTIONS_API_H
00019 
00020 #include <zorba/config.h>
00021 #include <zorba/internal/unique_ptr.h>
00022 #include <zorba/zorba_string.h>
00023 
00024 namespace zorba {
00025 
00026 ///////////////////////////////////////////////////////////////////////////////
00027 
00028 /**
00029  * Provides a way for a function to return a sequence of some type \c T that
00030  * can be iterated over.
00031  *
00032  * @tparam T The type of sequence.
00033  */
00034 template<typename T>
00035 class Sequence {
00036 public:
00037   typedef T value_type;
00038 
00039   struct iterator {
00040     virtual ~iterator() { }
00041     virtual bool next( value_type *result ) = 0;
00042   };
00043 
00044   /**
00045    * Constructs a new %Sequence.
00046    * This constructor is intended only for function implementors.
00047    *
00048    * @param i The iterator that provides the elements of the sequence.
00049    * Ownership of the iterator is taken.
00050    */
00051   Sequence( iterator *i ) : i_( i ) { } 
00052 
00053   /**
00054    * Copy constructs a %Sequence.
00055    *
00056    * @param s The %Sequence to copy from.  Note that it is a destructive copy
00057    * in that the sequence of \a s is 'i' moved.
00058    */
00059   Sequence( Sequence const &s ) : i_( std::move( s.i_ ) ) { }
00060 
00061   /**
00062    * Gets the next element in the sequence.
00063    *
00064    * @param result A pointer to the variable to receive the next element.
00065    * @return \c true only if there is a next element.
00066    */
00067   bool next( value_type *result ) {
00068     return i_->next( result );
00069   }
00070 
00071 private:
00072   mutable std::unique_ptr<iterator> i_;
00073 
00074   // forbid
00075   Sequence& operator=( Sequence const& );
00076 };
00077 
00078 ///////////////////////////////////////////////////////////////////////////////
00079 
00080 namespace fn {
00081 
00082 ////////// 5.4 Functions on string values /////////////////////////////////////
00083 
00084 /**
00085  * Translates every character to its upper-case correspondent as defined in the
00086  * appropriate case mappings section in the Unicode standard.
00087  * 
00088  * Every lower-case character that does not have an upper-case correspondent,
00089  * as well as every upper-case character, is included in the returned value in
00090  * its original form.
00091  *
00092  * @param arg The string to translate.
00093  * @return \a arg translated to upper-case.
00094  */
00095 ZORBA_DLL_PUBLIC
00096 String upper_case( String const &arg );
00097 
00098 /**
00099  * Translates every character to its lower-case correspondent as defined in the
00100  * appropriate case mappings section in the Unicode standard.
00101  * 
00102  * Every upper-case character that does not have a lower-case correspondent, as
00103  * well as every lower-case character, is included in the returned value in its
00104  * original form.
00105  *
00106  * @param arg The string to translate.
00107  * @return \a arg translated to lower-case.
00108  */
00109 ZORBA_DLL_PUBLIC
00110 String lower_case( String const &arg );
00111 
00112 ////////// 5.5 Functions based on substring matching //////////////////////////
00113 
00114 /**
00115  * Tests whether or not the value of \a arg1 ends with a sequence of
00116  * collation units that provides a match to the collation units of \a arg2.
00117  *
00118  * @param arg1 The string to test.
00119  * @param arg2 The substring.
00120  * @return \c true only if \a arg1 ends with \a arg2 or \a arg2 is the
00121  * zero-length string.
00122  */
00123 ZORBA_DLL_PUBLIC
00124 bool ends_with( String const &arg1, String const &arg2 );
00125 
00126 /**
00127  * Tests whether or not the value of \a arg1 ends with a sequence of
00128  * collation units that provides a match to the collation units of \a arg2.
00129  *
00130  * @param arg1 The string to test.
00131  * @param arg2 The substring.
00132  * @return \c true only if \a arg1 ends with \a arg2 or \a arg2 is the
00133  * zero-length string.
00134  */
00135 ZORBA_DLL_PUBLIC
00136 bool ends_with( String const &arg1, char const *arg2 );
00137 
00138 /**
00139  * Tests whether or not the value of \a arg1 starts with a sequence of
00140  * collation units that provides a match to the collation units of \a arg2.
00141  *
00142  * @param arg1 The string to test.
00143  * @param arg2 The substring.
00144  * @return \c true only if \a arg1 starts with \a arg2 or \a arg2 is the
00145  * zero-length string.
00146  */
00147 ZORBA_DLL_PUBLIC
00148 bool starts_with( String const &arg1, String const &arg2 );
00149 
00150 /**
00151  * Tests whether or not the value of \a arg1 starts with a sequence of
00152  * collation units that provides a match to the collation units of \a arg2.
00153  *
00154  * @param arg1 The string to test.
00155  * @param arg2 The substring.
00156  * @return \c true only if \a arg1 starts with \a arg2 or \a arg2 is the
00157  * zero-length string.
00158  */
00159 ZORBA_DLL_PUBLIC
00160 bool starts_with( String const &arg1, char const *arg2 );
00161 
00162 //////// 6 Functions that manipulate URIs /////////////////////////////////////
00163 
00164 /**
00165  * Encodes reserved characters in an xs:string that is intended to be used in
00166  * the path segment of a URI.  It is invertible but not idempotent.
00167  * 
00168  * This function applies the URI escaping rules defined in section 2 of [RFC
00169  * 3986] to the xs:string supplied as \a uri_part.  The effect of the function
00170  * is to escape reserved characters. Each such character in the string is
00171  * replaced with its percent-encoded form as described in [RFC 3986].
00172  *
00173  * All characters are escaped except those identified as "unreserved" by [RFC
00174  * 3986], that is the upper- and lower-case letters A-Z, the digits 0-9,
00175  * HYPHEN-MINUS ("-"), LOW LINE ("_"), FULL STOP ".", and TILDE "~".
00176  *
00177  * @param uri_part The URI to be encoded.
00178  * @return the encoded string.
00179  */
00180 ZORBA_DLL_PUBLIC
00181 String encode_for_uri( String const &uri_part );
00182 
00183 ////////// 7.6 String Functions that Use Pattern Matching /////////////////////
00184 
00185 /**
00186  * This function breaks the \a input string into a sequence of strings,
00187  * treating any substring that matches \a pattern as a separator.  The
00188  * separators themselves are not returned.
00189  *
00190  * Performance note: if \a pattern is a simple string (not a regular expression
00191  * with meta-characers), it is more efficient to use String::find().
00192  *
00193  * @param input The string to be split into tokens.  If \a input is the empty
00194  * sequence, or if \a input is the zero-length string, the result is the empty
00195  * sequence.
00196  * @param pattern The regular expression.  If it matches a zero-length string,
00197  * then an error is raised: [err:FORX0003].
00198  * @param flags The regular expression flags, if any.
00199  * @return a sequence of strings for the tokens.
00200  */
00201 ZORBA_DLL_PUBLIC
00202 Sequence<String> tokenize( String const &input, char const *pattern,
00203                            char const *flags = "" );
00204 
00205 /**
00206  * This function breaks the \a input string into a sequence of strings,
00207  * treating any substring that matches \a pattern as a separator.  The
00208  * separators themselves are not returned.
00209  *
00210  * Performance note: if \a pattern is a simple string (not a regular expression
00211  * with meta-characers), it is more efficient to use String::find().
00212  *
00213  * @param input The string to be split into tokens.  If \a input is the empty
00214  * sequence, or if \a input is the zero-length string, the result is the empty
00215  * sequence.
00216  * @param pattern The regular expression.  If it matches a zero-length string,
00217  * then an error is raised: [err:FORX0003].
00218  * @param flags The regular expression flags, if any.
00219  * @return a sequence of strings for the tokens.
00220  */
00221 inline
00222 Sequence<String> tokenize( String const &input, String const &pattern,
00223                            char const *flags = "" ) {
00224   return tokenize( input, pattern.c_str(), flags );
00225 }
00226 
00227 ///////////////////////////////////////////////////////////////////////////////
00228 
00229 } // namespace fn
00230 } // namespace zorba
00231 
00232 #endif /* ZORBA_XQUERY_FUNCTIONS_API_H */
00233 /* vim:set et sw=2 ts=2: */
blog comments powered by Disqus