xmlparser.cpp
Go to the documentation of this file.
00001 /*************************************************************************** 00002 file : $URL: http://svn.code.sf.net/p/frepple/code/trunk/src/utils/xmlparser.cpp $ 00003 version : $LastChangedRevision: 1713 $ $LastChangedBy: jdetaeye $ 00004 date : $LastChangedDate: 2012-07-18 11:46:01 +0200 (Wed, 18 Jul 2012) $ 00005 ***************************************************************************/ 00006 00007 /*************************************************************************** 00008 * * 00009 * Copyright (C) 2007-2012 by Johan De Taeye, frePPLe bvba * 00010 * * 00011 * This library is free software; you can redistribute it and/or modify it * 00012 * under the terms of the GNU Affero General Public License as published * 00013 * by the Free Software Foundation; either version 3 of the License, or * 00014 * (at your option) any later version. * 00015 * * 00016 * This library is distributed in the hope that it will be useful, * 00017 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 00019 * GNU Affero General Public License for more details. * 00020 * * 00021 * You should have received a copy of the GNU Affero General Public * 00022 * License along with this program. * 00023 * If not, see <http://www.gnu.org/licenses/>. * 00024 * * 00025 ***************************************************************************/ 00026 00027 #define FREPPLE_CORE 00028 #include "frepple/utils.h" 00029 #include <sys/stat.h> 00030 00031 /* Uncomment the next line to create a lot of debugging messages during 00032 * the parsing of XML-data. */ 00033 //#define PARSE_DEBUG 00034 00035 // With VC++ we use the Win32 functions to browse a directory 00036 #ifdef _MSC_VER 00037 #define WIN32_LEAN_AND_MEAN 00038 #include <windows.h> 00039 #else 00040 // With Unix-like systems we use a check suggested by the autoconf tools 00041 #if HAVE_DIRENT_H 00042 # include <dirent.h> 00043 # define NAMLEN(dirent) strlen((dirent)->d_name) 00044 #else 00045 # define dirent direct 00046 # define NAMLEN(dirent) (dirent)->d_namlen 00047 # if HAVE_SYS_NDIR_H 00048 # include <sys/ndir.h> 00049 # endif 00050 # if HAVE_SYS_DIR_H 00051 # include <sys/dir.h> 00052 # endif 00053 # if HAVE_NDIR_H 00054 # include <ndir.h> 00055 # endif 00056 #endif 00057 #endif 00058 00059 00060 namespace frepple 00061 { 00062 namespace utils 00063 { 00064 00065 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::STANDARD = 1; 00066 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::PLAN = 2; 00067 DECLARE_EXPORT const XMLOutput::content_type XMLOutput::PLANDETAIL = 4; 00068 00069 00070 void XMLInput::processingInstruction 00071 (const XMLCh *const target, const XMLCh *const data) 00072 { 00073 char* type = xercesc::XMLString::transcode(target); 00074 char* value = xercesc::XMLString::transcode(data); 00075 try 00076 { 00077 if (!strcmp(type,"python")) 00078 { 00079 // "python" is the only processing instruction which we process. 00080 // Others will be silently ignored 00081 try 00082 { 00083 // Execute the processing instruction 00084 PythonInterpreter::execute(value); 00085 } 00086 catch (const DataException& e) 00087 { 00088 if (abortOnDataException) 00089 { 00090 xercesc::XMLString::release(&type); 00091 xercesc::XMLString::release(&value); 00092 throw; 00093 } 00094 else logger << "Continuing after data error: " << e.what() << endl; 00095 } 00096 } 00097 xercesc::XMLString::release(&type); 00098 xercesc::XMLString::release(&value); 00099 } 00100 catch (...) 00101 { 00102 xercesc::XMLString::release(&type); 00103 xercesc::XMLString::release(&value); 00104 throw; 00105 } 00106 } 00107 00108 00109 void XMLInput::startElement(const XMLCh* const uri, const XMLCh* const n, 00110 const XMLCh* const qname, const xercesc::Attributes& atts) 00111 { 00112 // Validate the state 00113 assert(!states.empty()); 00114 00115 // Check for excessive number of open objects 00116 if (numElements >= maxdepth) 00117 throw DataException("XML-document with elements nested excessively deep"); 00118 00119 // Push the element on the stack 00120 datapair *pElement = &m_EStack[numElements+1]; 00121 pElement->first.reset(n); 00122 pElement->second.reset(); 00123 00124 // Store a pointer to the attributes 00125 attributes = &atts; 00126 00127 switch (states.top()) 00128 { 00129 case SHUTDOWN: 00130 // STATE: Parser is shutting down, and we can ignore all input that 00131 // is still coming 00132 return; 00133 00134 case IGNOREINPUT: 00135 // STATE: Parser is ignoring a part of the input 00136 if (pElement->first.getHash() == endingHashes.top()) 00137 // Increase the count of occurences before the ignore section ends 00138 ++ignore; 00139 ++numElements; 00140 return; 00141 00142 case INIT: 00143 // STATE: The only time the parser comes in this state is when we read 00144 // opening tag of the ROOT tag. 00145 #ifdef PARSE_DEBUG 00146 if (!m_EHStack.empty()) 00147 logger << "Initialize root tag for reading object " 00148 << getCurrentObject() << " (" 00149 << typeid(*getCurrentObject()).name() << ")" << endl; 00150 else 00151 logger << "Initialize root tag for reading object NULL" << endl; 00152 #endif 00153 states.top() = READOBJECT; 00154 endingHashes.push(pElement->first.getHash()); 00155 // Note that there is no break or return here. We also execute the 00156 // statements of the following switch-case. 00157 00158 case READOBJECT: 00159 // STATE: Parser is reading data elements of an object 00160 // Debug 00161 #ifdef PARSE_DEBUG 00162 logger << " Start element " << pElement->first.getName() 00163 << " - object " << getCurrentObject() << endl; 00164 #endif 00165 00166 // Call the handler of the object 00167 assert(!m_EHStack.empty()); 00168 try {getCurrentObject()->beginElement(*this, pElement->first);} 00169 catch (const DataException& e) 00170 { 00171 if (abortOnDataException) throw; 00172 else logger << "Continuing after data error: " << e.what() << endl; 00173 } 00174 00175 // Now process all attributes. For attributes we only call the 00176 // endElement() member and skip the beginElement() method. 00177 numElements += 1; 00178 if (states.top() != IGNOREINPUT) 00179 for (unsigned int i=0, cnt=atts.getLength(); i<cnt; i++) 00180 { 00181 char* val = xercesc::XMLString::transcode(atts.getValue(i)); 00182 m_EStack[numElements+1].first.reset(atts.getLocalName(i)); 00183 m_EStack[numElements+1].second.setData(val); 00184 #ifdef PARSE_DEBUG 00185 char* attname = xercesc::XMLString::transcode(atts.getQName(i)); 00186 logger << " Processing attribute " << attname 00187 << " - object " << getCurrentObject() << endl; 00188 xercesc::XMLString::release(&attname); 00189 #endif 00190 try {getCurrentObject()->endElement(*this, m_EStack[numElements+1].first, m_EStack[numElements+1].second);} 00191 catch (const DataException& e) 00192 { 00193 if (abortOnDataException) throw; 00194 else logger << "Continuing after data error: " << e.what() << endl; 00195 } 00196 xercesc::XMLString::release(&val); 00197 // Stop processing attributes if we are now in the ignore mode 00198 if (states.top() == IGNOREINPUT) break; 00199 } 00200 } // End of switch statement 00201 00202 // Outside of this handler, no attributes are available 00203 attributes = NULL; 00204 } 00205 00206 00207 void XMLInput::endElement(const XMLCh* const uri, 00208 const XMLCh* const s, 00209 const XMLCh* const qname) 00210 { 00211 // Validate the state 00212 assert(numElements >= 0); 00213 assert(!states.empty()); 00214 assert(numElements < maxdepth); 00215 00216 // Remove an element from the stack 00217 datapair *pElement = &(m_EStack[numElements--]); 00218 00219 switch (states.top()) 00220 { 00221 case INIT: 00222 // This should never happen! 00223 throw LogicException("Unreachable code reached"); 00224 00225 case SHUTDOWN: 00226 // STATE: Parser is shutting down, and we can ignore all input that is 00227 // still coming 00228 return; 00229 00230 case IGNOREINPUT: 00231 // STATE: Parser is ignoring a part of the input 00232 #ifdef PARSE_DEBUG 00233 logger << " End element " << pElement->first.getName() 00234 << " - IGNOREINPUT state" << endl; 00235 #endif 00236 // Continue if we aren't dealing with the tag being ignored 00237 if (pElement->first.getHash() != endingHashes.top()) return; 00238 if (ignore == 0) 00239 { 00240 // Finished ignoring now 00241 states.pop(); 00242 endingHashes.pop(); 00243 #ifdef PARSE_DEBUG 00244 logger << "Finish IGNOREINPUT state" << endl; 00245 #endif 00246 } 00247 else 00248 --ignore; 00249 break; 00250 00251 case READOBJECT: 00252 // STATE: Parser is reading data elements of an object 00253 #ifdef PARSE_DEBUG 00254 logger << " End element " << pElement->first.getName() 00255 << " - object " << getCurrentObject() << endl; 00256 #endif 00257 00258 // Check if we finished with the current handler 00259 assert(!m_EHStack.empty()); 00260 if (pElement->first.getHash() == endingHashes.top()) 00261 { 00262 // Call the ending handler of the Object, with a special 00263 // flag to specify that this object is now ended 00264 objectEnded = true; 00265 try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);} 00266 catch (const DataException& e) 00267 { 00268 if (abortOnDataException) throw; 00269 else logger << "Continuing after data error: " << e.what() << endl; 00270 } 00271 objectEnded = false; 00272 #ifdef PARSE_DEBUG 00273 logger << "Finish reading object " << getCurrentObject() << endl; 00274 #endif 00275 // Pop from the handler object stack 00276 prev = getCurrentObject(); 00277 m_EHStack.pop_back(); 00278 endingHashes.pop(); 00279 00280 // Pop from the state stack 00281 states.pop(); 00282 if (m_EHStack.empty()) 00283 shutdown(); 00284 else 00285 { 00286 // Call also the endElement function on the owning object 00287 try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);} 00288 catch (const DataException& e) 00289 { 00290 if (abortOnDataException) throw; 00291 else logger << "Continuing after data error: " << e.what() << endl; 00292 } 00293 #ifdef PARSE_DEBUG 00294 logger << " End element " << pElement->first.getName() 00295 << " - object " << getCurrentObject() << endl; 00296 #endif 00297 } 00298 } 00299 else 00300 // This tag is not the ending tag of an object 00301 // Call the function of the Object 00302 try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);} 00303 catch (const DataException& e) 00304 { 00305 if (abortOnDataException) throw; 00306 else logger << "Continuing after data error: " << e.what() << endl; 00307 } 00308 } 00309 } 00310 00311 00312 // Unfortunately the prototype for this handler function differs between 00313 // Xerces-c 2.x and 3.x 00314 #if XERCES_VERSION_MAJOR==2 00315 void XMLInput::characters(const XMLCh *const c, const unsigned int n) 00316 #else 00317 void XMLInput::characters(const XMLCh *const c, const XMLSize_t n) 00318 #endif 00319 { 00320 // No data capture during the ignore state 00321 if (states.top()==IGNOREINPUT) return; 00322 00323 // Process the data 00324 char* name = xercesc::XMLString::transcode(c); 00325 m_EStack[numElements].second.addData(name, strlen(name)); 00326 xercesc::XMLString::release(&name); 00327 } 00328 00329 00330 void XMLInput::warning(const xercesc::SAXParseException& exception) 00331 { 00332 char* message = xercesc::XMLString::transcode(exception.getMessage()); 00333 logger << "Warning: " << message 00334 << " at line: " << exception.getLineNumber() << endl; 00335 xercesc::XMLString::release(&message); 00336 } 00337 00338 00339 DECLARE_EXPORT void XMLInput::readto(Object * pPI) 00340 { 00341 // Keep track of the tag where this object will end 00342 assert(numElements >= -1); 00343 endingHashes.push(m_EStack[numElements+1].first.getHash()); 00344 if (pPI) 00345 { 00346 // Push a new object on the handler stack 00347 #ifdef PARSE_DEBUG 00348 logger << "Start reading object " << pPI 00349 << " (" << typeid(*pPI).name() << ")" << endl; 00350 #endif 00351 prev = getCurrentObject(); 00352 m_EHStack.push_back(make_pair(pPI,static_cast<void*>(NULL))); 00353 states.push(READOBJECT); 00354 } 00355 else 00356 { 00357 // Ignore the complete content of this element 00358 #ifdef PARSE_DEBUG 00359 logger << "Start ignoring input" << endl; 00360 #endif 00361 states.push(IGNOREINPUT); 00362 } 00363 } 00364 00365 00366 void XMLInput::shutdown() 00367 { 00368 // Already shutting down... 00369 if (states.empty() || states.top() == SHUTDOWN) return; 00370 00371 // Message 00372 #ifdef PARSE_DEBUG 00373 logger << " Forcing a shutdown - SHUTDOWN state" << endl; 00374 #endif 00375 00376 // Change the state 00377 states.push(SHUTDOWN); 00378 00379 // Done if we have no elements on the stack, i.e. a normal end. 00380 if (numElements<0) return; 00381 00382 // Call the ending handling of all objects on the stack 00383 // This allows them to finish off in a valid state, and delete any temporary 00384 // objects they may have allocated. 00385 objectEnded = true; 00386 m_EStack[numElements].first.reset("Not a real tag"); 00387 m_EStack[numElements].second.reset(); 00388 while (!m_EHStack.empty()) 00389 { 00390 try {getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);} 00391 catch (const DataException& e) 00392 { 00393 if (abortOnDataException) throw; 00394 else logger << "Continuing after data error: " << e.what() << endl; 00395 } 00396 m_EHStack.pop_back(); 00397 } 00398 } 00399 00400 00401 void XMLInput::reset() 00402 { 00403 // Delete the xerces parser object 00404 delete parser; 00405 parser = NULL; 00406 00407 // Call the ending handling of all objects on the stack 00408 // This allows them to finish off in a valid state, and delete any temporary 00409 // objects they may have allocated. 00410 if (!m_EHStack.empty()) 00411 { 00412 // The next line is to avoid calling the endElement handler twice for the 00413 // last object. E.g. endElement handler causes and exception, and as part 00414 // of the exception handling we call the reset method. 00415 if (objectEnded) m_EHStack.pop_back(); 00416 objectEnded = true; 00417 m_EStack[++numElements].first.reset("Not a real tag"); 00418 m_EStack[++numElements].second.reset(); 00419 while (!m_EHStack.empty()) 00420 { 00421 try {getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);} 00422 catch (const DataException& e) 00423 { 00424 if (abortOnDataException) throw; 00425 else logger << "Continuing after data error: " << e.what() << endl; 00426 } 00427 m_EHStack.pop_back(); 00428 } 00429 } 00430 00431 // Cleanup of stacks 00432 while (!states.empty()) states.pop(); 00433 while (!endingHashes.empty()) endingHashes.pop(); 00434 00435 // Set all variables back to their starting values 00436 numElements = -1; 00437 ignore = 0; 00438 objectEnded = false; 00439 attributes = NULL; 00440 } 00441 00442 00443 void XMLInput::parse(xercesc::InputSource &in, Object *pRoot, bool validate) 00444 { 00445 try 00446 { 00447 // Create a Xerces parser 00448 parser = xercesc::XMLReaderFactory::createXMLReader(); 00449 00450 // Set the features of the parser. A bunch of the options are dependent 00451 // on whether we want to validate the input or not. 00452 parser->setProperty(xercesc::XMLUni::fgXercesScannerName, const_cast<XMLCh*> 00453 (validate ? xercesc::XMLUni::fgSGXMLScanner : xercesc::XMLUni::fgWFXMLScanner)); 00454 parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, validate); 00455 parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpacePrefixes, false); 00456 parser->setFeature(xercesc::XMLUni::fgXercesIdentityConstraintChecking, false); 00457 parser->setFeature(xercesc::XMLUni::fgXercesDynamic, false); 00458 parser->setFeature(xercesc::XMLUni::fgXercesSchema, validate); 00459 parser->setFeature(xercesc::XMLUni::fgXercesSchemaFullChecking, false); 00460 parser->setFeature(xercesc::XMLUni::fgXercesValidationErrorAsFatal,true); 00461 parser->setFeature(xercesc::XMLUni::fgXercesIgnoreAnnotations,true); 00462 00463 if (validate) 00464 { 00465 // Specify the no-namespace schema file 00466 string schema = Environment::searchFile("frepple.xsd"); 00467 if (schema.empty()) 00468 throw RuntimeException("Can't find XML schema file 'frepple.xsd'"); 00469 XMLCh *c = xercesc::XMLString::transcode(schema.c_str()); 00470 parser->setProperty( 00471 xercesc::XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation, c 00472 ); 00473 xercesc::XMLString::release(&c); 00474 } 00475 00476 // If we are reading into a NULL object, there is no need to use a 00477 // content handler or a handler stack. 00478 if (pRoot) 00479 { 00480 // Set the event handler. If we are reading into a NULL object, there is 00481 // no need to use a content handler. 00482 parser->setContentHandler(this); 00483 00484 // Get the parser to read data into the object pRoot. 00485 m_EHStack.push_back(make_pair(pRoot,static_cast<void*>(NULL))); 00486 states.push(INIT); 00487 } 00488 00489 // Set the error handler 00490 parser->setErrorHandler(this); 00491 00492 // Parse the input 00493 parser->parse(in); 00494 } 00495 // Note: the reset() method needs to be called in all circumstances. The 00496 // reset method allows all objects to finish in a valid state and clean up 00497 // any memory they may have allocated. 00498 catch (const xercesc::XMLException& toCatch) 00499 { 00500 char* message = xercesc::XMLString::transcode(toCatch.getMessage()); 00501 string msg(message); 00502 xercesc::XMLString::release(&message); 00503 reset(); 00504 throw RuntimeException("Parsing error: " + msg); 00505 } 00506 catch (const xercesc::SAXParseException& toCatch) 00507 { 00508 char* message = xercesc::XMLString::transcode(toCatch.getMessage()); 00509 ostringstream msg; 00510 if (toCatch.getLineNumber() > 0) 00511 msg << "Parsing error: " << message << " at line " << toCatch.getLineNumber(); 00512 else 00513 msg << "Parsing error: " << message; 00514 xercesc::XMLString::release(&message); 00515 reset(); 00516 throw RuntimeException(msg.str()); 00517 } 00518 catch (const exception& toCatch) 00519 { 00520 reset(); 00521 ostringstream msg; 00522 msg << "Error during XML parsing: " << toCatch.what(); 00523 throw RuntimeException(msg.str()); 00524 } 00525 catch (...) 00526 { 00527 reset(); 00528 throw RuntimeException( 00529 "Parsing error: Unexpected exception during XML parsing"); 00530 } 00531 reset(); 00532 } 00533 00534 00535 DECLARE_EXPORT ostream& operator << (ostream& os, const XMLEscape& x) 00536 { 00537 for (const char* p = x.data; *p; ++p) 00538 { 00539 switch (*p) 00540 { 00541 case '&': os << "&"; break; 00542 case '<': os << "<"; break; 00543 case '>': os << ">"; break; 00544 case '"': os << """; break; 00545 case '\'': os << "'"; break; 00546 default: os << *p; 00547 } 00548 } 00549 return os; 00550 } 00551 00552 00553 DECLARE_EXPORT void XMLOutput::incIndent() 00554 { 00555 indentstring[m_nIndent++] = '\t'; 00556 if (m_nIndent > 40) m_nIndent = 40; 00557 indentstring[m_nIndent] = '\0'; 00558 } 00559 00560 00561 DECLARE_EXPORT void XMLOutput::decIndent() 00562 { 00563 if (--m_nIndent < 0) m_nIndent = 0; 00564 indentstring[m_nIndent] = '\0'; 00565 } 00566 00567 00568 DECLARE_EXPORT void XMLOutput::writeElement 00569 (const Keyword& tag, const Object* object, mode m) 00570 { 00571 // Avoid NULL pointers and skip hidden objects 00572 if (!object || object->getHidden()) return; 00573 00574 // Adjust current and parent object pointer 00575 const Object *previousParent = parentObject; 00576 parentObject = currentObject; 00577 currentObject = object; 00578 ++numObjects; 00579 ++numParents; 00580 00581 // Call the write method on the object 00582 if (m != DEFAULT) 00583 // Mode is overwritten 00584 object->writeElement(this, tag, m); 00585 else 00586 // Choose wether to save a reference of the object. 00587 // The root object can't be saved as a reference. 00588 object->writeElement(this, tag, numParents>2 ? REFERENCE : DEFAULT); 00589 00590 // Adjust current and parent object pointer 00591 --numParents; 00592 currentObject = parentObject; 00593 parentObject = previousParent; 00594 } 00595 00596 00597 DECLARE_EXPORT void XMLOutput::writeElementWithHeader(const Keyword& tag, const Object* object) 00598 { 00599 // Root object can't be null... 00600 if (!object) 00601 throw RuntimeException("Can't accept a NULL object as XML root"); 00602 00603 // There should not be any saved objects yet 00604 if (numObjects > 0) 00605 throw LogicException("Can't have multiple headers in a document"); 00606 assert(!parentObject); 00607 assert(!currentObject); 00608 00609 // Write the first line for the xml document 00610 writeString(getHeaderStart()); 00611 00612 // Adjust current object pointer 00613 currentObject = object; 00614 00615 // Write the object 00616 ++numObjects; 00617 ++numParents; 00618 BeginObject(tag, getHeaderAtts()); 00619 object->writeElement(this, tag, NOHEADER); 00620 00621 // Adjust current and parent object pointer 00622 currentObject = NULL; 00623 parentObject = NULL; 00624 } 00625 00626 00627 DECLARE_EXPORT void XMLOutput::writeHeader(const Keyword& tag) 00628 { 00629 // There should not be any saved objects yet 00630 if (numObjects > 0 || !parentObject || !currentObject) 00631 throw LogicException("Writing invalid header to XML document"); 00632 00633 // Write the first line and the opening tag 00634 writeString(getHeaderStart()); 00635 BeginObject(tag, getHeaderAtts()); 00636 00637 // Fake a dummy parent 00638 numParents += 2; 00639 } 00640 00641 00642 DECLARE_EXPORT bool XMLElement::getBool() const 00643 { 00644 switch (getData()[0]) 00645 { 00646 case 'T': 00647 case 't': 00648 case '1': 00649 return true; 00650 case 'F': 00651 case 'f': 00652 case '0': 00653 return false; 00654 } 00655 throw DataException("Invalid boolean value: " + string(getData())); 00656 } 00657 00658 00659 DECLARE_EXPORT const char* Attribute::getName() const 00660 { 00661 if (ch) return ch; 00662 Keyword::tagtable::const_iterator i = Keyword::getTags().find(hash); 00663 if (i == Keyword::getTags().end()) 00664 throw LogicException("Undefined element keyword"); 00665 return i->second->getName().c_str(); 00666 } 00667 00668 00669 DECLARE_EXPORT Keyword::Keyword(const string& name) : strName(name) 00670 { 00671 // Error condition: name is empty 00672 if (name.empty()) throw LogicException("Creating keyword without name"); 00673 00674 // Create a number of variations of the tag name 00675 strStartElement = string("<") + name; 00676 strEndElement = string("</") + name + ">\n"; 00677 strElement = string("<") + name + ">"; 00678 strAttribute = string(" ") + name + "=\""; 00679 00680 // Compute the hash value 00681 dw = hash(name.c_str()); 00682 00683 // Create a properly encoded Xerces string 00684 xercesc::XMLPlatformUtils::Initialize(); 00685 xmlname = xercesc::XMLString::transcode(name.c_str()); 00686 00687 // Verify that the hash is "perfect". 00688 check(); 00689 } 00690 00691 00692 DECLARE_EXPORT Keyword::Keyword(const string& name, const string& nspace) 00693 : strName(name) 00694 { 00695 // Error condition: name is empty 00696 if (name.empty()) 00697 throw LogicException("Creating keyword without name"); 00698 if (nspace.empty()) 00699 throw LogicException("Creating keyword with empty namespace"); 00700 00701 // Create a number of variations of the tag name 00702 strStartElement = string("<") + nspace + ":" + name; 00703 strEndElement = string("</") + nspace + ":" + name + ">\n"; 00704 strElement = string("<") + nspace + ":" + name + ">"; 00705 strAttribute = string(" ") + nspace + ":" + name + "=\""; 00706 00707 // Compute the hash value 00708 dw = hash(name); 00709 00710 // Create a properly encoded Xerces string 00711 xercesc::XMLPlatformUtils::Initialize(); 00712 xmlname = xercesc::XMLString::transcode(string(nspace + ":" + name).c_str()); 00713 00714 // Verify that the hash is "perfect". 00715 check(); 00716 } 00717 00718 00719 void Keyword::check() 00720 { 00721 // To be thread-safe we make sure only a single thread at a time 00722 // can execute this check. 00723 static Mutex dd; 00724 { 00725 ScopeMutexLock l(dd); 00726 tagtable::const_iterator i = getTags().find(dw); 00727 if (i!=getTags().end() && i->second->getName()!=strName) 00728 throw LogicException("Tag XML-tag hash function clashes for " 00729 + i->second->getName() + " and " + strName); 00730 getTags().insert(make_pair(dw,this)); 00731 } 00732 } 00733 00734 00735 DECLARE_EXPORT Keyword::~Keyword() 00736 { 00737 // Remove from the tag list 00738 tagtable::iterator i = getTags().find(dw); 00739 if (i!=getTags().end()) getTags().erase(i); 00740 00741 // Destroy the xerces string 00742 xercesc::XMLString::release(&xmlname); 00743 xercesc::XMLPlatformUtils::Terminate(); 00744 } 00745 00746 00747 DECLARE_EXPORT const Keyword& Keyword::find(const char* name) 00748 { 00749 tagtable::const_iterator i = getTags().find(hash(name)); 00750 return *(i!=getTags().end() ? i->second : new Keyword(name)); 00751 } 00752 00753 00754 DECLARE_EXPORT Keyword::tagtable& Keyword::getTags() 00755 { 00756 static tagtable alltags; 00757 return alltags; 00758 } 00759 00760 00761 DECLARE_EXPORT hashtype Keyword::hash(const char* c) 00762 { 00763 if (c == 0 || *c == 0) return 0; 00764 00765 // Compute hash 00766 const char* curCh = c; 00767 hashtype hashVal = *curCh++; 00768 while (*curCh) 00769 hashVal = (hashVal * 38) + (hashVal >> 24) + *curCh++; 00770 00771 // Divide by modulus 00772 return hashVal % 954991; 00773 } 00774 00775 00776 DECLARE_EXPORT hashtype Keyword::hash(const XMLCh* t) 00777 { 00778 char* c = xercesc::XMLString::transcode(t); 00779 if (c == 0 || *c == 0) 00780 { 00781 xercesc::XMLString::release(&c); 00782 return 0; 00783 } 00784 00785 // Compute hash 00786 const char* curCh = c; 00787 hashtype hashVal = *curCh++; 00788 while (*curCh) 00789 hashVal = (hashVal * 38) + (hashVal >> 24) + *curCh++; 00790 00791 // Divide by modulus 00792 xercesc::XMLString::release(&c); 00793 return hashVal % 954991; 00794 } 00795 00796 00797 DECLARE_EXPORT void Keyword::printTags() 00798 { 00799 for (tagtable::iterator i = getTags().begin(); i != getTags().end(); ++i) 00800 logger << i->second->getName() << " " << i->second->dw << endl; 00801 } 00802 00803 00804 void XMLInputFile::parse(Object *pRoot, bool validate) 00805 { 00806 // Check if string has been set 00807 if (filename.empty()) 00808 throw DataException("Missing input file or directory"); 00809 00810 // Check if the parameter is the name of a directory 00811 struct stat stat_p; 00812 if (stat(filename.c_str(), &stat_p)) 00813 // Can't verify the status 00814 throw RuntimeException("Couldn't open input file '" + filename + "'"); 00815 else if (stat_p.st_mode & S_IFDIR) 00816 { 00817 // Data is a directory: loop through all *.xml files now. No recursion in 00818 // subdirectories is done. 00819 // The code is unfortunately different for Windows & Linux. Sigh... 00820 #ifdef _MSC_VER 00821 string f = filename + "\\*.xml"; 00822 WIN32_FIND_DATA dir_entry_p; 00823 HANDLE h = FindFirstFile(f.c_str(), &dir_entry_p); 00824 if (h == INVALID_HANDLE_VALUE) 00825 throw RuntimeException("Couldn't open input file '" + f + "'"); 00826 do 00827 { 00828 f = filename + '/' + dir_entry_p.cFileName; 00829 XMLInputFile(f.c_str()).parse(pRoot); 00830 } 00831 while (FindNextFile(h, &dir_entry_p)); 00832 FindClose(h); 00833 #elif HAVE_DIRENT_H 00834 struct dirent *dir_entry_p; 00835 DIR *dir_p = opendir(filename.c_str()); 00836 while (NULL != (dir_entry_p = readdir(dir_p))) 00837 { 00838 int n = NAMLEN(dir_entry_p); 00839 if (n > 4 && !strcmp(".xml", dir_entry_p->d_name + n - 4)) 00840 { 00841 string f = filename + '/' + dir_entry_p->d_name; 00842 XMLInputFile(f.c_str()).parse(pRoot, validate); 00843 } 00844 } 00845 closedir(dir_p); 00846 #else 00847 throw RuntimeException("Can't process a directory on your platform"); 00848 #endif 00849 } 00850 else 00851 { 00852 // Normal file 00853 // Parse the file 00854 XMLCh *f = xercesc::XMLString::transcode(filename.c_str()); 00855 xercesc::LocalFileInputSource in(f); 00856 xercesc::XMLString::release(&f); 00857 XMLInput::parse(in, pRoot, validate); 00858 } 00859 } 00860 00861 } // end namespace 00862 } // end namespace