bes  Updated for version 3.20.8
SaxParserWrapper.cc
1 // This file is part of the "NcML Module" project, a BES module designed
3 // to allow NcML files to be used to be used as a wrapper to add
4 // AIS to existing datasets of any format.
5 //
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: Michael Johnson <m.johnson@opendap.org>
8 //
9 // For more information, please also see the main website: http://opendap.org/
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26 //
27 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29 
30 #include "config.h"
31 
32 #include "SaxParserWrapper.h"
33 
34 #include <exception>
35 #include <iostream>
36 #include <libxml/parser.h>
37 #include <libxml/xmlstring.h>
38 #include <cstdio> // for vsnprintf
39 #include <string>
40 
41 #include "BESDebug.h"
42 #include "BESError.h"
43 #include "BESInternalError.h"
44 #include "BESInternalFatalError.h"
45 #include "BESSyntaxUserError.h"
46 #include "BESForbiddenError.h"
47 #include "BESNotFoundError.h"
48 #include "NCMLDebug.h"
49 #include "SaxParser.h"
50 #include "XMLHelpers.h"
51 
52 // Toggle to tell the parser to use the Sax2 start/end element
53 // calls with namespace information.
54 // [ TODO We probably want to remove the non-namespace pathways at some point,
55 // but I will leave them here for now in case there's issues ]
56 #define NCML_PARSER_USE_SAX2_NAMESPACES 1
57 
58 using namespace std;
59 using namespace ncml_module;
60 
62 // Helpers
63 
64 #if NCML_PARSER_USE_SAX2_NAMESPACES
65 static const int SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE = 5;
66 static int toXMLAttributeMapWithNamespaces(XMLAttributeMap& attrMap, const xmlChar** attributes, int num_attributes)
67 {
68  attrMap.clear();
69  for (int i = 0; i < num_attributes; ++i) {
70  XMLAttribute attr;
71  attr.fromSAX2NamespaceAttributes(attributes);
72  attributes += SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE; // jump to start of next record
73  attrMap.addAttribute(attr);
74  }
75  return num_attributes;
76 }
77 #else
78 // Assumes the non-namespace calls, so attrs is stride 2 {name,value}
79 static int toXMLAttributeMapNoNamespaces(XMLAttributeMap& attrMap, const xmlChar** attrs)
80 {
81  attrMap.clear();
82  int count=0;
83  while (attrs && *attrs != NULL)
84  {
85  XMLAttribute attr;
86  attr.localname = XMLUtil::xmlCharToString(*attrs);
87  attr.value = XMLUtil::xmlCharToString(*(attrs+1));
88  attrMap.addAttribute(attr);
89  attrs += 2;
90  count++;
91  }
92  return count;
93 }
94 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
95 
97 // Callback we will register that just pass on to our C++ engine
98 //
99 // NOTE WELL: New C handlers need to follow the given
100 // other examples in order to avoid memory leaks
101 // in libxml during an exception!
102 
103 // To avoid cut & paste below, we use this macro to cast the void* into the wrapper and
104 // set up a proper error handling structure around the main call.
105 // The macro internally defines the symbol "parser" to the SaxParser contained in the wrapper.
106 // So for example, a safe handler call to SaxParser would look like:
107 // static void ncmlStartDocument(void* userData)
108 //{
109 // BEGIN_SAFE_HANDLER_CALL(userData); // pass in the void*, which is a SaxParserWrapper*
110 // parser.onStartDocument(); // call the dispatch on the wrapped parser using the autodefined name parser
111 // END_SAFE_HANDLER_CALL; // end the error handling wrapper
112 //}
113 
114 #define BEGIN_SAFE_PARSER_BLOCK(argName) { \
115  SaxParserWrapper* _spw_ = static_cast<SaxParserWrapper*>(argName); \
116  if (_spw_->isExceptionState()) \
117  { \
118  return; \
119  } \
120  else \
121  { \
122  try \
123  { \
124  SaxParser& parser = _spw_->getParser(); \
125  parser.setParseLineNumber(_spw_->getCurrentParseLine());
126 
127 // This is required after the end of the actual calls to the parser.
128 #define END_SAFE_PARSER_BLOCK } \
129  catch (BESError& theErr) \
130  { \
131  BESDEBUG("ncml", "Caught BESError&, deferring..." << endl); \
132  _spw_->deferException(theErr); \
133  } \
134  catch (std::exception& ex) \
135  { \
136  BESDEBUG("ncml", "Caught std::exception&, wrapping and deferring..." << endl); \
137  BESInternalError _badness_("Wrapped std::exception.what()=" + string(ex.what()), __FILE__, __LINE__);\
138  _spw_->deferException(_badness_); \
139  } \
140  catch (...) \
141  { \
142  BESDEBUG("ncml", "Caught unknown (...) exception: deferring default error." << endl); \
143  BESInternalError _badness_("SaxParserWrapper:: Unknown Exception Type: ", __FILE__, __LINE__); \
144  _spw_->deferException(_badness_); \
145  } \
146  } \
147 }
148 
150 // Our C SAX callbacks, wrapped carefully.
151 
152 static void ncmlStartDocument(void* userData)
153 {
154  BEGIN_SAFE_PARSER_BLOCK(userData)
155 
156  parser.onStartDocument();
157 
158  END_SAFE_PARSER_BLOCK
159 }
160 
161 static void ncmlEndDocument(void* userData)
162 {
163  BEGIN_SAFE_PARSER_BLOCK(userData)
164 
165  parser.onEndDocument();
166 
167  END_SAFE_PARSER_BLOCK
168 }
169 
170 #if !NCML_PARSER_USE_SAX2_NAMESPACES
171 
172 static void ncmlStartElement(void * userData,
173  const xmlChar * name,
174  const xmlChar ** attrs)
175 {
176  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
177  BEGIN_SAFE_PARSER_BLOCK(1)
178 
179  string nameS = XMLUtil::xmlCharToString(name);
180  XMLAttributeMap map;
181  toXMLAttributeMapNoNamespaces(map, attrs);
182 
183  // These args will be valid for the scope of the call.
184  parser.onStartElement(nameS, map);
185 
186  END_SAFE_PARSER_BLOCK
187 }
188 
189 static void ncmlEndElement(void * userData,
190  const xmlChar * name)
191 {
192  BEGIN_SAFE_PARSER_BLOCK(1)
193 
194  string nameS = XMLUtil::xmlCharToString(name);
195  parser.onEndElement(nameS);
196 
197  END_SAFE_PARSER_BLOCK
198 }
199 #endif // !NCML_PARSER_USE_SAX2_NAMESPACES
200 
201 #if NCML_PARSER_USE_SAX2_NAMESPACES
202 static
203 void ncmlSax2StartElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
204  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /* nb_defaulted */,
205  const xmlChar **attributes)
206 {
207  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
208  BEGIN_SAFE_PARSER_BLOCK(userData)
209 
210  BESDEBUG("ncml", "SaxParserWrapper::ncmlSax2StartElementNs() - localname:" << localname << endl);
211 
212  XMLAttributeMap attrMap;
213  toXMLAttributeMapWithNamespaces(attrMap, attributes, nb_attributes);
214 
215  XMLNamespaceMap nsMap;
216  nsMap.fromSAX2Namespaces(namespaces, nb_namespaces);
217 
218  // These args will be valid for the scope of the call.
219  string localnameString = XMLUtil::xmlCharToString(localname);
220  string prefixString = XMLUtil::xmlCharToString(prefix);
221  string uriString = XMLUtil::xmlCharToString(URI);
222 
223  parser.onStartElementWithNamespace(
224  localnameString,
225  prefixString,
226  uriString,
227  attrMap,
228  nsMap);
229 
230  END_SAFE_PARSER_BLOCK
231 }
232 
233 static
234 void ncmlSax2EndElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI)
235 {
236  BEGIN_SAFE_PARSER_BLOCK(userData)
237 
238  string localnameString = XMLUtil::xmlCharToString(localname);
239  string prefixString = XMLUtil::xmlCharToString(prefix);
240  string uriString = XMLUtil::xmlCharToString(URI);
241  parser.onEndElementWithNamespace(localnameString, prefixString, uriString);
242 
243  END_SAFE_PARSER_BLOCK
244 }
245 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
246 
247 static void ncmlCharacters(void* userData, const xmlChar* content, int len)
248 {
249  BEGIN_SAFE_PARSER_BLOCK(userData)
250 
251  // len is since the content string might not be null terminated,
252  // so we have to build out own and pass it up special....
253  // TODO consider just using these xmlChar's upstairs to avoid copies, or make an adapter or something.
254  string characters("");
255  characters.reserve(len);
256  const xmlChar* contentEnd = content+len;
257  while(content != contentEnd)
258  {
259  characters += (const char)(*content++);
260  }
261 
262  parser.onCharacters(characters);
263 
264  END_SAFE_PARSER_BLOCK
265 }
266 
267 static void ncmlWarning(void* userData, const char* msg, ...)
268 {
269  BEGIN_SAFE_PARSER_BLOCK(userData)
270 
271  BESDEBUG("ncml", "SaxParserWrapper::ncmlWarning() - msg:" << msg << endl);
272 
273  char buffer[1024];
274  va_list(args);
275  va_start(args, msg);
276  unsigned int len = sizeof(buffer);
277  vsnprintf(buffer, len, msg, args);
278  va_end(args);
279  parser.onParseWarning(string(buffer));
280 
281  END_SAFE_PARSER_BLOCK
282 }
283 
284 static void ncmlFatalError(void* userData, const char* msg, ...)
285 {
286  BEGIN_SAFE_PARSER_BLOCK(userData)
287 
288  BESDEBUG("ncml", "SaxParserWrapper::ncmlFatalError() - msg:" << msg << endl);
289 
290  char buffer[1024];
291  va_list(args);
292  va_start(args, msg);
293  unsigned int len = sizeof(buffer);
294  vsnprintf(buffer, len, msg, args);
295  va_end(args);
296  parser.onParseError(string(buffer));
297 
298  END_SAFE_PARSER_BLOCK
299 }
300 
302 // class SaxParserWrapper impl
303 
304 SaxParserWrapper::SaxParserWrapper(SaxParser& parser) :
305  _parser(parser), _handler(), _state(NOT_PARSING), _errorMsg(""), _errorType(0), _errorFile(""), _errorLine(-1)
306 {
307 }
308 
309 SaxParserWrapper::~SaxParserWrapper()
310 {
311  // Really not much to do... everything cleans itself up.
312  _state = NOT_PARSING;
313 
314  // Leak fix. jhrg 6/21/19
315  cleanupParser();
316 }
317 
318 bool SaxParserWrapper::parse(const string& ncmlFilename)
319 {
320  // It's illegal to call this until it's done.
321  if (_state == PARSING) {
322  throw BESInternalError("Parse called again while already in parse.", __FILE__, __LINE__);
323  }
324 
325  // OK, now we're parsing
326  _state = PARSING;
327 
328  setupParser();
329 
330  bool success = xmlSAXUserParseFile(&_handler, this, ncmlFilename.c_str());
331 
332  // If we deferred an exception during the libxml parse call, now's the time to rethrow it.
333  if (isExceptionState()) {
335  }
336 
337  // Otherwise, we're also done parsing.
338  _state = NOT_PARSING;
339  return success;
340 }
341 
343 {
344  _state = EXCEPTION;
345  _errorType = theErr.get_bes_error_type();
346  _errorMsg = theErr.get_message();
347  _errorLine = theErr.get_line();
348  _errorFile = theErr.get_file();
349 }
350 
351 // HACK admittedly a little gross, but it's weird to have to copy an exception
352 // and this seemed the safest way rather than making dynamic storage, etc.
354 {
355  // Clear our state out so we can parse again though.
356  _state = NOT_PARSING;
357 
358  switch (_errorType) {
359  case BES_INTERNAL_ERROR:
360  throw BESInternalError(_errorMsg, _errorFile, _errorLine);
361 
362  case BES_INTERNAL_FATAL_ERROR:
363  throw BESInternalFatalError(_errorMsg, _errorFile, _errorLine);
364 
365  case BES_SYNTAX_USER_ERROR:
366  throw BESSyntaxUserError(_errorMsg, _errorFile, _errorLine);
367 
368  case BES_FORBIDDEN_ERROR:
369  throw BESForbiddenError(_errorMsg, _errorFile, _errorLine);
370 
371  case BES_NOT_FOUND_ERROR:
372  throw BESNotFoundError(_errorMsg, _errorFile, _errorLine);
373 
374  default:
375  throw BESInternalError("Unknown exception type.", __FILE__, __LINE__);
376  }
377 }
378 
380 {
381 #if 0
382  if (_context) {
383  return xmlSAX2GetLineNumber(_context);
384  }
385  else {
386  return -1;
387  }
388 #endif
389  return -1; //FIXME part of leak fix. jhrg 6.21.19
390 }
391 
392 static void setAllHandlerCBToNulls(xmlSAXHandler& h)
393 {
394  h.internalSubset = 0;
395  h.isStandalone = 0;
396  h.hasInternalSubset = 0;
397  h.hasExternalSubset = 0;
398  h.resolveEntity = 0;
399  h.getEntity = 0;
400  h.entityDecl = 0;
401  h.notationDecl = 0;
402  h.attributeDecl = 0;
403  h.elementDecl = 0;
404  h.unparsedEntityDecl = 0;
405  h.setDocumentLocator = 0;
406  h.startDocument = 0;
407  h.endDocument = 0;
408  h.startElement = 0;
409  h.endElement = 0;
410  h.reference = 0;
411  h.characters = 0;
412  h.ignorableWhitespace = 0;
413  h.processingInstruction = 0;
414  h.comment = 0;
415  h.warning = 0;
416  h.error = 0;
417  h.fatalError = 0;
418  h.getParameterEntity = 0;
419  h.cdataBlock = 0;
420  h.externalSubset = 0;
421 
422  // unsigned int initialized; magic number the init should fill in
423  /* The following fields are extensions available only on version 2 */
424  // void *_private; //i'd assume i don't set this either...
425  h.startElementNs = 0;
426  h.endElementNs = 0;
427  h.serror = 0;
428 }
429 
430 void SaxParserWrapper::setupParser()
431 {
432  // setup the handler for version 2,
433  // which sets an internal version magic number
434  // into _handler.initialized
435  // but which doesn't clear the handlers to 0.
436  xmlSAXVersion(&_handler, 2);
437 
438  // Initialize all handlers to 0 by hand to start
439  // so we don't blow those internal magic numbers.
440  setAllHandlerCBToNulls(_handler);
441 
442  // Put our static functions into the handler
443  _handler.startDocument = ncmlStartDocument;
444  _handler.endDocument = ncmlEndDocument;
445  _handler.warning = ncmlWarning;
446  _handler.error = ncmlFatalError;
447  _handler.fatalError = ncmlFatalError;
448  _handler.characters = ncmlCharacters;
449 
450  // We'll use one or the other until we're sure it works.
451 #if NCML_PARSER_USE_SAX2_NAMESPACES
452  _handler.startElement = 0;
453  _handler.endElement = 0;
454  _handler.startElementNs = ncmlSax2StartElementNs;
455  _handler.endElementNs = ncmlSax2EndElementNs;
456 #else
457  _handler.startElement = ncmlStartElement;
458  _handler.endElement = ncmlEndElement;
459  _handler.startElementNs = 0;
460  _handler.endElementNs = 0;
461 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
462 }
463 
464 // Leak fix. jhrg 6/21/19
465 void SaxParserWrapper::cleanupParser() throw ()
466 {
467 }
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
virtual int get_bes_error_type()
Return the return code for this error class.
Definition: BESError.h:143
virtual int get_line()
get the line number where the exception was thrown
Definition: BESError.h:115
virtual std::string get_file()
get the file name where the exception was thrown
Definition: BESError.h:107
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
exception thrown if an internal error is found and is fatal to the BES
error thrown if the resource requested cannot be found
error thrown if there is a user syntax error in the request or any other user error
bool parse(const std::string &ncmlFilename)
Do a SAX parse of the ncmlFilename and pass the calls to wrapper parser.
void deferException(BESError &theErr)
The remaining calls are for the internals of the parser, but need to be public.
Interface class for the wrapper between libxml C SAX parser and our NCMLParser.
Definition: SaxParser.h:48
void addAttribute(const XMLAttribute &attribute)
Definition: XMLHelpers.cc:167
void fromSAX2Namespaces(const xmlChar **pNamespaces, int numNamespaces)
Definition: XMLHelpers.cc:320
NcML Parser for adding/modifying/removing metadata (attributes) to existing local datasets using NcML...
void fromSAX2NamespaceAttributes(const xmlChar **chunkOfFivePointers)
Definition: XMLHelpers.cc:94