34 #include <libxml/parserInternals.h>
41 #include <D4Attributes.h>
44 #include <D4BaseTypeFactory.h>
46 #include <DapXmlNamespaces.h>
49 #include <BESInternalError.h>
51 #include <BESCatalog.h>
52 #include <BESCatalogUtils.h>
53 #include <BESCatalogList.h>
55 #include <TheBESKeys.h>
58 #include "DmrppParserSax2.h"
59 #include "DmrppCommon.h"
61 #include "DmrppNames.h"
62 #include "DmrppArray.h"
64 #include "CurlUtils.h"
68 #define FIVE_12K 524288;
69 #define ONE_MB 1048576;
70 #define MAX_INPUT_LINE_LENGTH ONE_MB;
72 #define prolog std::string("DmrppParserSax2::").append(__func__).append("() - ")
74 static const string dmrpp_namespace =
"http://xml.opendap.org/dap/dmrpp/1.0.0#";
83 static const char *states[] = {
89 "inside_attribute_container",
91 "inside_attribute_value",
92 "inside_other_xml_attribute",
101 "inside_constructor",
103 "inside_dmrpp_object",
104 "inside_dmrpp_chunkDimensionSizes_element",
105 "inside_dmrpp_compact_element",
108 "parser_fatal_error",
112 static bool is_not(
const char *name,
const char *tag)
114 return strcmp(name, tag) != 0;
127 DmrppParserSax2::enum_def()
129 if (!d_enum_def) d_enum_def =
new D4EnumDef;
141 DmrppParserSax2::dim_def()
143 if (!d_dim_def) d_dim_def =
new D4Dimension;
155 string DmrppParserSax2::get_attribute_val(
const string &name,
const xmlChar **attributes,
int num_attributes)
157 unsigned int index = 0;
158 for (
int i = 0; i < num_attributes; ++i, index += 5) {
159 if (strncmp(name.c_str(), (
const char *)attributes[index], name.length()) == 0) {
160 return string((
const char *)attributes[index+3], (
const char *)attributes[index+4]);
172 void DmrppParserSax2::transfer_xml_attrs(
const xmlChar **attributes,
int nb_attributes)
174 if (!xml_attrs.empty()) xml_attrs.clear();
178 unsigned int index = 0;
179 for (
int i = 0; i < nb_attributes; ++i, index += 5) {
181 map<string, XMLAttribute>::value_type(
string((
const char *) attributes[index]),
182 XMLAttribute(attributes + index + 1)));
184 BESDEBUG(PARSER, prolog <<
185 "XML Attribute '" << (
const char *)attributes[index] <<
"': " << xml_attrs[(
const char *)attributes[index]].value << endl);
196 void DmrppParserSax2::transfer_xml_ns(
const xmlChar **namespaces,
int nb_namespaces)
199 for (
int i = 0; i < nb_namespaces; ++i) {
200 namespace_table.insert(
201 map<string, string>::value_type(namespaces[i * 2] != 0 ? (
const char *) namespaces[i * 2] :
"",
202 (
const char *) namespaces[i * 2 + 1]));
213 bool DmrppParserSax2::check_required_attribute(
const string & attr)
215 if (xml_attrs.find(attr) == xml_attrs.end()) {
216 dmr_error(
this,
"Required attribute '%s' not found.", attr.c_str());
233 bool DmrppParserSax2::check_required_attribute(
const string &name,
const xmlChar **attributes,
int num_attributes)
235 unsigned int index = 0;
236 for (
int i = 0; i < num_attributes; ++i, index += 5) {
237 if (strncmp(name.c_str(), (
const char *)attributes[index], name.length()) == 0) {
242 dmr_error(
this,
"Required attribute '%s' not found.", name.c_str());
253 bool DmrppParserSax2::check_attribute(
const string & attr)
255 return (xml_attrs.find(attr) != xml_attrs.end());
269 bool DmrppParserSax2::check_attribute(
const string &name,
const xmlChar **attributes,
int num_attributes)
271 unsigned int index = 0;
272 for (
int i = 0; i < num_attributes; ++i, index += 5) {
273 if (strncmp(name.c_str(), (
const char *)attributes[index], name.length()) == 0) {
280 bool DmrppParserSax2::process_dimension_def(
const char *name,
const xmlChar **attrs,
int nb_attributes)
282 if (is_not(name,
"Dimension"))
return false;
285 transfer_xml_attrs(attrs, nb_attributes);
289 if (!(check_required_attribute(
"name", attrs, nb_attributes) && check_required_attribute(
"size", attrs, nb_attributes))) {
290 dmr_error(
this,
"The required attribute 'name' or 'size' was missing from a Dimension element.");
295 if (!check_required_attribute(
"name", attrs, nb_attributes)) {
296 dmr_error(
this,
"The required attribute 'name' was missing from a Dimension element.");
300 if (!check_required_attribute(
"size", attrs, nb_attributes)) {
301 dmr_error(
this,
"The required attribute 'size' was missing from a Dimension element.");
306 dim_def()->set_name(get_attribute_val(
"name", attrs, nb_attributes));
308 dim_def()->set_size(get_attribute_val(
"size", attrs, nb_attributes));
311 dmr_error(
this, e.get_error_message().c_str());
335 bool DmrppParserSax2::process_dimension(
const char *name,
const xmlChar **attrs,
int nb_attributes)
337 if (is_not(name,
"Dim"))
return false;
340 transfer_xml_attrs(attrs, nb_attributes);
343 if (check_attribute(
"size", attrs, nb_attributes) && check_attribute(
"name", attrs, nb_attributes)) {
344 dmr_error(
this,
"Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
347 if (!(check_attribute(
"size", attrs, nb_attributes) || check_attribute(
"name", attrs, nb_attributes))) {
348 dmr_error(
this,
"Either 'size' or 'name' must be used in a Dim element.");
352 bool has_size = check_attribute(
"size", attrs, nb_attributes);
353 bool has_name = check_attribute(
"name", attrs, nb_attributes);
354 if (has_size && has_name) {
355 dmr_error(
this,
"Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
358 if (!has_size && !has_name) {
359 dmr_error(
this,
"Either 'size' or 'name' must be used in a Dim element.");
364 if (!top_basetype()->is_vector_type()) {
366 BaseType *b = top_basetype();
369 Array *a =
static_cast<Array*
>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
370 a->set_is_dap4(
true);
371 a->add_var_nocopy(b);
372 a->set_attributes_nocopy(b->attributes());
376 b->set_attributes_nocopy(0);
381 assert(top_basetype()->is_vector_type());
383 Array *a =
static_cast<Array*
>(top_basetype());
385 size_t dim_size = stoi(get_attribute_val(
"size", attrs, nb_attributes));
386 BESDEBUG(PARSER, prolog <<
"Processing nameless Dim of size: " << dim_size << endl);
387 a->append_dim(dim_size);
391 string name = get_attribute_val(
"name", attrs, nb_attributes);
392 BESDEBUG(PARSER, prolog <<
"Processing Dim with named Dimension reference: " << name << endl);
394 D4Dimension *dim = 0;
396 dim = dmr()->root()->find_dim(name);
399 dim = top_group()->find_dim(name);
402 throw BESInternalError(
"The dimension '" + name +
"' was not found while parsing the variable '" + a->name() +
"'.",__FILE__,__LINE__);
410 bool DmrppParserSax2::process_dmrpp_compact_start(
const char *name){
411 if ( strcmp(name,
"compact") == 0) {
412 BESDEBUG(PARSER, prolog <<
"DMR++ compact element. localname: " << name << endl);
413 BaseType *bt = top_basetype();
414 if (!bt)
throw BESInternalError(
"Could not locate parent BaseType during parse operation.", __FILE__, __LINE__);
415 DmrppCommon *dc =
dynamic_cast<DmrppCommon*
>(bt);
417 throw BESInternalError(
"Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
418 dc->set_compact(
true);
427 void DmrppParserSax2::process_dmrpp_compact_end(
const char *localname)
429 BESDEBUG(PARSER, prolog <<
"BEGIN DMR++ compact element. localname: " << localname << endl);
430 if (is_not(localname,
"compact"))
433 BaseType *target = top_basetype();
435 throw BESInternalError(
"Could not locate parent BaseType during parse operation.", __FILE__, __LINE__);
436 BESDEBUG(PARSER, prolog <<
"BaseType: " << target->type_name() <<
" " << target->name() << endl);
438 if (target->type() != dods_array_c)
439 throw BESInternalError(
"The dmrpp::compact element must be the child of an array variable",__FILE__,__LINE__);
441 DmrppCommon *dc =
dynamic_cast<DmrppCommon*
>(target);
443 throw BESInternalError(
"Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
445 dc->set_compact(
true);
449 std::string data(char_data);
450 BESDEBUG(PARSER, prolog <<
"Read compact element text. size: " << data.size() <<
" length: " << data.length() <<
" value: '" << data <<
"'" << endl);
452 std::vector <u_int8_t> decoded = base64::Base64::decode(data);
454 switch (target->var()->type()) {
456 throw BESInternalError(
"Parser state has been corrupted. An Array may not be the template for an Array.", __FILE__, __LINE__);
474 target->val2buf(
reinterpret_cast<void *
>(&decoded[0]));
475 target->set_read_p(
true);
481 std::string str(decoded.begin(), decoded.end());
482 DmrppArray *st =
dynamic_cast<DmrppArray *
>(target);
485 msg << prolog <<
"The target BaseType MUST be an array. and it's a " << target->type_name();
486 BESDEBUG(MODULE, msg.str() << endl);
490 st->set_read_p(
true);
495 throw BESInternalError(
"Unsupported COMPACT storage variable type in the drmpp handler.", __FILE__, __LINE__);
500 BESDEBUG(PARSER, prolog <<
"END" << endl);
503 bool DmrppParserSax2::process_map(
const char *name,
const xmlChar **attrs,
int nb_attributes)
505 if (is_not(name,
"Map"))
return false;
508 transfer_xml_attrs(attrs, nb_attributes);
511 if (!check_attribute(
"name", attrs, nb_attributes)) {
512 dmr_error(
this,
"The 'name' attribute must be used in a Map element.");
516 if (!top_basetype()->is_vector_type()) {
518 BaseType *b = top_basetype();
521 Array *a =
static_cast<Array*
>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
522 a->set_is_dap4(
true);
523 a->add_var_nocopy(b);
524 a->set_attributes_nocopy(b->attributes());
528 b->set_attributes_nocopy(0);
533 assert(top_basetype()->is_vector_type());
535 Array *a =
static_cast<Array*
>(top_basetype());
537 string map_name = get_attribute_val(
"name", attrs, nb_attributes);
538 if (get_attribute_val(
"name", attrs, nb_attributes).at(0) !=
'/') map_name = top_group()->FQN() + map_name;
540 Array *map_source = 0;
542 if (map_name[0] ==
'/')
543 map_source = dmr()->root()->find_map_source(map_name);
546 map_source = top_group()->find_map_source(map_name);
554 if (!map_source && d_strict)
555 throw BESInternalError(
"The Map '" + map_name +
"' was not found while parsing the variable '" + a->name() +
"'.",__FILE__,__LINE__);
557 a->maps()->add_map(
new D4Map(map_name, map_source));
562 bool DmrppParserSax2::process_group(
const char *name,
const xmlChar **attrs,
int nb_attributes)
564 if (is_not(name,
"Group"))
return false;
567 transfer_xml_attrs(attrs, nb_attributes);
570 if (!check_required_attribute(
"name", attrs, nb_attributes)) {
571 dmr_error(
this,
"The required attribute 'name' was missing from a Group element.");
575 BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, get_attribute_val(
"name", attrs, nb_attributes));
577 dmr_fatal_error(
this,
"Could not instantiate the Group '%s'.", get_attribute_val(
"name", attrs, nb_attributes).c_str());
581 D4Group *grp =
static_cast<D4Group*
>(btp);
585 grp->set_is_dap4(
true);
588 D4Group *parent = top_group();
590 dmr_fatal_error(
this,
"No Group on the Group stack.");
594 grp->set_parent(parent);
595 parent->add_group_nocopy(grp);
598 push_attributes(grp->attributes());
608 inline bool DmrppParserSax2::process_attribute(
const char *name,
const xmlChar **attrs,
int nb_attributes)
610 if (is_not(name,
"Attribute"))
return false;
614 transfer_xml_attrs(attrs, nb_attributes);
618 if (!(check_required_attribute(
string(
"name"), attrs, nb_attributes) && check_required_attribute(
string(
"type"), attrs, nb_attributes))) {
619 dmr_error(
this,
"The required attribute 'name' or 'type' was missing from an Attribute element.");
623 if (get_attribute_val(
"type", attrs, nb_attributes) ==
"Container") {
624 push_state(inside_attribute_container);
626 BESDEBUG(PARSER, prolog <<
"Pushing attribute container " << get_attribute_val(
"name", attrs, nb_attributes) << endl);
627 D4Attribute *child =
new D4Attribute(get_attribute_val(
"name", attrs, nb_attributes), attr_container_c);
629 D4Attributes *tos = top_attributes();
633 dmr_fatal_error(
this,
"Expected an Attribute container on the top of the attribute stack.");
637 tos->add_attribute_nocopy(child);
638 push_attributes(child->attributes());
640 else if (get_attribute_val(
"type", attrs, nb_attributes) ==
"OtherXML") {
641 push_state(inside_other_xml_attribute);
643 dods_attr_name = get_attribute_val(
"name", attrs, nb_attributes);
644 dods_attr_type = get_attribute_val(
"type", attrs, nb_attributes);
647 push_state(inside_attribute);
649 dods_attr_name = get_attribute_val(
"name", attrs, nb_attributes);
650 dods_attr_type = get_attribute_val(
"type", attrs, nb_attributes);
661 inline bool DmrppParserSax2::process_enum_def(
const char *name,
const xmlChar **attrs,
int nb_attributes)
663 if (is_not(name,
"Enumeration"))
return false;
666 transfer_xml_attrs(attrs, nb_attributes);
669 if (!(check_required_attribute(
"name", attrs, nb_attributes) && check_required_attribute(
"basetype", attrs, nb_attributes))) {
670 dmr_error(
this,
"The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
674 Type t = get_type(get_attribute_val(
"basetype", attrs, nb_attributes).c_str());
675 if (!is_integer_type(t)) {
676 dmr_error(
this,
"The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
677 get_attribute_val(
"name", attrs, nb_attributes).c_str(), get_attribute_val(
"basetype", attrs, nb_attributes).c_str());
682 string enum_def_path = get_attribute_val(
"name", attrs, nb_attributes);
685 if (xml_attrs[
"name"].value[0] !=
'/')
686 enum_def_path = top_group()->FQN() + enum_def_path;
688 enum_def()->set_name(enum_def_path);
689 enum_def()->set_type(t);
694 inline bool DmrppParserSax2::process_enum_const(
const char *name,
const xmlChar **attrs,
int nb_attributes)
696 if (is_not(name,
"EnumConst"))
return false;
700 transfer_xml_attrs(attrs, nb_attributes);
703 if (!(check_required_attribute(
"name", attrs, nb_attributes) && check_required_attribute(
"value", attrs, nb_attributes))) {
704 dmr_error(
this,
"The required attribute 'name' or 'value' was missing from an EnumConst element.");
708 istringstream iss(get_attribute_val(
"value", attrs, nb_attributes));
710 iss >> skipws >> value;
711 if (iss.fail() || iss.bad()) {
712 dmr_error(
this,
"Expected an integer value for an Enumeration constant, got '%s' instead.",
713 get_attribute_val(
"value", attrs, nb_attributes).c_str());
715 else if (!enum_def()->is_valid_enum_value(value)) {
716 dmr_error(
this,
"In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
717 get_attribute_val(
"value", attrs, nb_attributes).c_str(), D4type_name(d_enum_def->type()).c_str());
721 enum_def()->add_value(get_attribute_val(
"name", attrs, nb_attributes), value);
732 inline bool DmrppParserSax2::process_variable(
const char *name,
const xmlChar **attrs,
int nb_attributes)
734 Type t = get_type(name);
735 if (is_simple_type(t)) {
736 process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
741 case dods_structure_c:
742 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
745 case dods_sequence_c:
746 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
762 void DmrppParserSax2::process_variable_helper(
Type t, ParseState s,
const xmlChar **attrs,
int nb_attributes)
765 transfer_xml_attrs(attrs, nb_attributes);
768 if (check_required_attribute(
"name", attrs, nb_attributes)) {
769 BaseType *btp = dmr()->factory()->NewVariable(t, get_attribute_val(
"name", attrs, nb_attributes));
771 dmr_fatal_error(
this,
"Could not instantiate the variable '%s'.", xml_attrs[
"name"].value.c_str());
775 if ((t == dods_enum_c) && check_required_attribute(
"enum", attrs, nb_attributes)) {
776 D4EnumDef *enum_def = 0;
777 string enum_path = get_attribute_val(
"enum", attrs, nb_attributes);
778 if (enum_path[0] ==
'/')
779 enum_def = dmr()->root()->find_enum_def(enum_path);
781 enum_def = top_group()->find_enum_def(enum_path);
783 if (!enum_def) dmr_fatal_error(
this,
"Could not find the Enumeration definition '%s'.", enum_path.c_str());
785 static_cast<D4Enum*
>(btp)->set_enumeration(enum_def);
788 btp->set_is_dap4(
true);
791 push_attributes(btp->attributes());
807 void DmrppParserSax2::dmr_start_document(
void * p)
810 parser->error_msg =
"";
811 parser->char_data =
"";
818 parser->push_attributes(parser->dmr()->root()->attributes());
820 BESDEBUG(PARSER, prolog <<
"Parser start state: " << states[parser->get_state()] << endl);
825 void DmrppParserSax2::dmr_end_document(
void * p)
829 BESDEBUG(PARSER, prolog <<
"Parser end state: " << states[parser->get_state()] << endl);
831 if (parser->get_state() != parser_end)
832 DmrppParserSax2::dmr_error(parser,
"The document contained unbalanced tags.");
836 if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error)
return;
838 if (!parser->empty_basetype() || parser->empty_group())
839 DmrppParserSax2::dmr_error(parser,
840 "The document did not contain a valid root Group or contained unbalanced tags.");
844 *os << prolog <<
"parser->top_group() BEGIN " << endl;
845 parser->top_group()->dump(*os);
846 *os << endl << prolog <<
"parser->top_group() END " << endl;
850 parser->pop_attributes();
853 void DmrppParserSax2::dmr_start_element(
void *p,
const xmlChar *l,
const xmlChar *prefix,
const xmlChar *URI,
854 int nb_namespaces,
const xmlChar **namespaces,
int nb_attributes,
int ,
const xmlChar **attributes)
857 const char *localname =
reinterpret_cast<const char *
>(l);
859 string this_element_ns_name(URI ? (
char *) URI :
"null");
861 if (parser->get_state() != parser_error) {
862 string dap4_ns_name = DapXmlNamspaces::getDapNamespaceString(DAP_4_0);
863 BESDEBUG(PARSER, prolog <<
"dap4_ns_name: " << dap4_ns_name << endl);
865 if (this_element_ns_name == dmrpp_namespace) {
866 if (strcmp(localname,
"chunkDimensionSizes") == 0) {
867 BESDEBUG(PARSER, prolog <<
"Found dmrpp:chunkDimensionSizes element. Pushing state." << endl);
868 parser->push_state(inside_dmrpp_chunkDimensionSizes_element);
870 else if (strcmp(localname,
"compact") == 0) {
871 BESDEBUG(PARSER, prolog <<
"Found dmrpp:compact element. Pushing state." << endl);
872 parser->push_state(inside_dmrpp_compact_element);
876 prolog <<
"Start of element in dmrpp namespace: " << localname <<
" detected." << endl);
877 parser->push_state(inside_dmrpp_object);
880 else if (this_element_ns_name != dap4_ns_name) {
881 BESDEBUG(PARSER, prolog <<
"Start of non DAP4 element: " << localname <<
" detected." << endl);
882 parser->push_state(not_dap4_element);
886 BESDEBUG(PARSER, prolog <<
"Start element " << localname <<
" prefix: " << (prefix ? (
char *) prefix :
"null") <<
" ns: "
887 << this_element_ns_name <<
" (state: " << states[parser->get_state()] <<
")" << endl);
889 switch (parser->get_state()) {
891 if (is_not(localname,
"Dataset"))
892 DmrppParserSax2::dmr_error(parser,
"Expected DMR to start with a Dataset element; found '%s' instead.",
895 parser->root_ns = URI ? (
const char *) URI :
"";
898 parser->transfer_xml_attrs(attributes, nb_attributes);
901 if (parser->check_required_attribute(
string(
"name"), attributes, nb_attributes)) parser->dmr()->set_name(parser->get_attribute_val(
"name", attributes, nb_attributes));
903 if (parser->check_attribute(
"dapVersion", attributes, nb_attributes))
904 parser->dmr()->set_dap_version(parser->get_attribute_val(
"dapVersion", attributes, nb_attributes));
906 if (parser->check_attribute(
"dmrVersion", attributes, nb_attributes))
907 parser->dmr()->set_dmr_version(parser->get_attribute_val(
"dmrVersion", attributes, nb_attributes));
909 if (parser->check_attribute(
"base", attributes, nb_attributes)) {
910 parser->dmr()->set_request_xml_base(parser->get_attribute_val(
"base", attributes, nb_attributes));
912 BESDEBUG(PARSER, prolog <<
"Dataset xml:base is set to '" << parser->dmr()->request_xml_base() <<
"'" << endl);
914 if (parser->check_attribute(
"href", attributes, nb_attributes)) {
915 parser->dmrpp_dataset_href = parser->get_attribute_val(
"href", attributes, nb_attributes);
916 BESDEBUG(PARSER, prolog <<
"Attempting to locate and cache the effective URL for Dataset URL: " << parser->dmrpp_dataset_href << endl);
917 string effective_url = EffectiveUrlCache::TheCache()->get_effective_url(parser->dmrpp_dataset_href);
918 BESDEBUG(PARSER, prolog <<
"EffectiveUrlCache::get_effective_url() returned: " << effective_url << endl);
920 BESDEBUG(PARSER, prolog <<
"Dataset dmrpp:href is set to '" << parser->dmrpp_dataset_href <<
"'" << endl);
922 if (!parser->root_ns.empty()) parser->dmr()->set_namespace(parser->root_ns);
925 parser->push_group(parser->dmr()->root());
927 parser->push_state(inside_dataset);
936 if (parser->process_enum_def(localname, attributes, nb_attributes))
937 parser->push_state(inside_enum_def);
938 else if (parser->process_dimension_def(localname, attributes, nb_attributes))
939 parser->push_state(inside_dim_def);
940 else if (parser->process_group(localname, attributes, nb_attributes))
941 parser->push_state(inside_group);
942 else if (parser->process_variable(localname, attributes, nb_attributes))
946 else if (parser->process_attribute(localname, attributes, nb_attributes))
951 DmrppParserSax2::dmr_error(parser,
952 "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.",
956 case inside_attribute_container:
957 if (parser->process_attribute(localname, attributes, nb_attributes))
960 DmrppParserSax2::dmr_error(parser,
"Expected an Attribute element; found '%s' instead.", localname);
963 case inside_attribute:
964 if (parser->process_attribute(localname, attributes, nb_attributes))
966 else if (strcmp(localname,
"Value") == 0)
967 parser->push_state(inside_attribute_value);
969 dmr_error(parser,
"Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
972 case inside_attribute_value:
976 case inside_other_xml_attribute:
977 parser->other_xml_depth++;
980 parser->other_xml.append(
"<");
982 parser->other_xml.append((
const char *) prefix);
983 parser->other_xml.append(
":");
985 parser->other_xml.append(localname);
987 if (nb_namespaces != 0) {
988 parser->transfer_xml_ns(namespaces, nb_namespaces);
990 for (map<string, string>::iterator i = parser->namespace_table.begin(); i != parser->namespace_table.end();
992 parser->other_xml.append(
" xmlns");
993 if (!i->first.empty()) {
994 parser->other_xml.append(
":");
995 parser->other_xml.append(i->first);
997 parser->other_xml.append(
"=\"");
998 parser->other_xml.append(i->second);
999 parser->other_xml.append(
"\"");
1003 if (nb_attributes != 0) {
1005 parser->transfer_xml_attrs(attributes, nb_attributes);
1007 for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
1008 parser->other_xml.append(
" ");
1009 if (!i->second.prefix.empty()) {
1010 parser->other_xml.append(i->second.prefix);
1011 parser->other_xml.append(
":");
1013 parser->other_xml.append(i->first);
1014 parser->other_xml.append(
"=\"");
1015 parser->other_xml.append(i->second.value);
1016 parser->other_xml.append(
"\"");
1020 parser->other_xml.append(
">");
1023 case inside_enum_def:
1025 if (parser->process_enum_const(localname, attributes, nb_attributes))
1026 parser->push_state(inside_enum_const);
1028 dmr_error(parser,
"Expected an 'EnumConst' element; found '%s' instead.", localname);
1031 case inside_enum_const:
1035 case inside_dim_def:
1047 case inside_simple_type:
1048 if (parser->process_attribute(localname, attributes, nb_attributes))
1050 else if (parser->process_dimension(localname, attributes, nb_attributes))
1051 parser->push_state(inside_dim);
1052 else if (parser->process_map(localname, attributes, nb_attributes))
1053 parser->push_state(inside_map);
1055 dmr_error(parser,
"Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
1058 case inside_constructor:
1059 if (parser->process_variable(localname, attributes, nb_attributes))
1063 else if (parser->process_attribute(localname, attributes, nb_attributes))
1065 else if (parser->process_dimension(localname, attributes, nb_attributes))
1066 parser->push_state(inside_dim);
1067 else if (parser->process_map(localname, attributes, nb_attributes))
1068 parser->push_state(inside_map);
1070 DmrppParserSax2::dmr_error(parser,
1071 "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
1074 case not_dap4_element:
1075 BESDEBUG(PARSER, prolog <<
"SKIPPING unexpected element. localname: " << localname <<
"namespace: "
1076 << this_element_ns_name << endl);
1079 case inside_dmrpp_compact_element:
1080 if (parser->process_dmrpp_compact_start(localname)) {
1081 BESDEBUG(PARSER, prolog <<
"Call to parser->process_dmrpp_compact_start() completed." << endl);
1085 case inside_dmrpp_object: {
1086 BESDEBUG(PARSER, prolog <<
"Inside dmrpp namespaced element. localname: " << localname << endl);
1087 assert(this_element_ns_name == dmrpp_namespace);
1090 parser->transfer_xml_attrs(attributes, nb_attributes);
1093 BaseType *bt = parser->top_basetype();
1094 if (!bt)
throw BESInternalError(
"Could locate parent BaseType during parse operation.", __FILE__, __LINE__);
1096 DmrppCommon *dc =
dynamic_cast<DmrppCommon*
>(bt);
1098 throw BESInternalError(
"Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
1101 if (strcmp(localname,
"chunks") == 0) {
1102 BESDEBUG(PARSER, prolog <<
"DMR++ chunks element. localname: " << localname << endl);
1104 if (parser->check_attribute(
"compressionType", attributes, nb_attributes)) {
1105 string compression_type_string(parser->get_attribute_val(
"compressionType", attributes, nb_attributes));
1106 dc->ingest_compression_type(compression_type_string);
1108 BESDEBUG(PARSER, prolog <<
"Processed attribute 'compressionType=\"" <<
1109 compression_type_string <<
"\"'" << endl);
1112 BESDEBUG(PARSER, prolog <<
"There was no 'compressionType' attribute associated with the variable '"
1113 << bt->type_name() <<
" " << bt->name() <<
"'" << endl);
1116 if (parser->check_attribute(
"byteOrder", attributes, nb_attributes)) {
1117 string byte_order_string(parser->get_attribute_val(
"byteOrder", attributes, nb_attributes));
1118 dc->ingest_byte_order(byte_order_string);
1120 BESDEBUG(PARSER, prolog <<
"Processed attribute 'byteOrder=\"" << byte_order_string <<
"\"'" << endl);
1123 BESDEBUG(PARSER, prolog <<
"There was no 'byteOrder' attribute associated with the variable '" << bt->type_name()
1124 <<
" " << bt->name() <<
"'" << endl);
1128 else if (strcmp(localname,
"chunk") == 0) {
1129 string data_url =
"unknown_data_location";
1130 if (parser->check_attribute(
"href", attributes, nb_attributes)) {
1132 istringstream data_url_ss(parser->xml_attrs[
"href"].value);
1133 data_url = data_url_ss.str();
1134 BESDEBUG(PARSER, prolog <<
"Processing 'href' value into data_url. href: " <<
1135 data_url_ss.str() << endl);
1138 data_url = parser->get_attribute_val(
"href", attributes, nb_attributes);
1139 BESDEBUG(PARSER, prolog <<
"Processing 'href' value into data_url. href: " << data_url << endl);
1142 BESDEBUG(PARSER, prolog <<
"Attempting to locate and cache the effective URL for Chunk URL: " << parser->dmrpp_dataset_href << endl);
1143 string effective_url = EffectiveUrlCache::TheCache()->get_effective_url(data_url);
1144 BESDEBUG(PARSER, prolog <<
"EffectiveUrlCache::get_effective_url() returned: " << effective_url << endl);
1147 BESDEBUG(PARSER, prolog <<
"No attribute 'href' located. Trying Dataset/@dmrpp:href..." << endl);
1150 data_url = parser->dmrpp_dataset_href;
1153 BESDEBUG(PARSER, prolog <<
"Processing dmrpp:href into data_url. dmrpp:href='" << data_url <<
"'" << endl);
1158 std::string
http(
"http://");
1159 std::string https(
"https://");
1160 std::string file(
"file://");
1161 if (data_url.compare(0,
http.size(),
http) && data_url.compare(0, https.size(), https)
1162 && data_url.compare(0, file.size(), file))
1165 if (data_url.find(
"http://") != 0 && data_url.find(
"https://") != 0 && data_url.find(
"file://") != 0) {
1166 BESDEBUG(PARSER, prolog <<
"data_url does NOT start with 'http://', 'https://' or 'file://'. "
1167 "Retrieving default catalog root directory" << endl);
1172 BESDEBUG(PARSER, prolog <<
"Not able to find the default catalog." << endl);
1178 BESDEBUG(PARSER, prolog <<
"Found default catalog root_dir: '" << utils->
get_root_dir() <<
"'" << endl);
1181 data_url =
"file://" + data_url;
1185 BESDEBUG(PARSER, prolog <<
"Processed data_url: '" << data_url <<
"'" << endl);
1187 unsigned long long offset = 0;
1188 unsigned long long size = 0;
1189 string chunk_position_in_array(
"");
1190 std::string byte_order = dc->get_byte_order();
1192 if (parser->check_required_attribute(
"offset", attributes, nb_attributes)) {
1193 istringstream offset_ss(parser->get_attribute_val(
"offset", attributes, nb_attributes));
1194 offset_ss >> offset;
1195 BESDEBUG(PARSER, prolog <<
"Processed attribute 'offset=\"" << offset <<
"\"'" << endl);
1198 dmr_error(parser,
"The hdf:byteStream element is missing the required attribute 'offset'.");
1201 if (parser->check_required_attribute(
"nBytes", attributes, nb_attributes)) {
1202 istringstream size_ss(parser->get_attribute_val(
"nBytes", attributes, nb_attributes));
1204 BESDEBUG(PARSER, prolog <<
"Processed attribute 'nBytes=\"" << size <<
"\"'" << endl);
1207 dmr_error(parser,
"The hdf:byteStream element is missing the required attribute 'size'.");
1210 if (parser->check_attribute(
"chunkPositionInArray", attributes, nb_attributes)) {
1211 istringstream chunk_position_ss(parser->get_attribute_val(
"chunkPositionInArray", attributes, nb_attributes));
1212 chunk_position_in_array = chunk_position_ss.str();
1213 BESDEBUG(PARSER, prolog <<
"Found attribute 'chunkPositionInArray' value: " << chunk_position_ss.str() << endl);
1216 BESDEBUG(PARSER, prolog <<
"No attribute 'chunkPositionInArray' located" << endl);
1219 dc->add_chunk(data_url, byte_order, size, offset, chunk_position_in_array);
1224 case inside_dmrpp_chunkDimensionSizes_element:
1228 case parser_unknown:
1230 case parser_fatal_error:
1238 BESDEBUG(PARSER, prolog <<
"Start element exit state: " << states[parser->get_state()] << endl);
1244 void DmrppParserSax2::dmr_end_element(
void *p,
const xmlChar *l,
const xmlChar *prefix,
const xmlChar *URI)
1246 DmrppParserSax2 *parser =
static_cast<DmrppParserSax2*
>(p);
1247 const char *localname = (
const char *) l;
1249 BESDEBUG(PARSER, prolog <<
"End element " << localname <<
" (state " << states[parser->get_state()] <<
")" << endl);
1251 switch (parser->get_state()) {
1253 dmr_fatal_error(parser,
"Unexpected state, inside start state while processing element '%s'.", localname);
1256 case inside_dataset:
1257 if (is_not(localname,
"Dataset"))
1258 DmrppParserSax2::dmr_error(parser,
"Expected an end Dataset tag; found '%s' instead.", localname);
1260 parser->pop_state();
1261 if (parser->get_state() != parser_start)
1262 dmr_fatal_error(parser,
"Unexpected state, expected start state.");
1264 parser->pop_state();
1265 parser->push_state(parser_end);
1269 case inside_group: {
1270 if (is_not(localname,
"Group"))
1271 DmrppParserSax2::dmr_error(parser,
"Expected an end tag for a Group; found '%s' instead.", localname);
1273 if (!parser->empty_basetype() || parser->empty_group())
1274 DmrppParserSax2::dmr_error(parser,
1275 "The document did not contain a valid root Group or contained unbalanced tags.");
1277 parser->pop_group();
1278 parser->pop_state();
1282 case inside_attribute_container:
1283 if (is_not(localname,
"Attribute"))
1284 DmrppParserSax2::dmr_error(parser,
"Expected an end Attribute tag; found '%s' instead.", localname);
1286 parser->pop_state();
1287 parser->pop_attributes();
1290 case inside_attribute:
1291 if (is_not(localname,
"Attribute"))
1292 DmrppParserSax2::dmr_error(parser,
"Expected an end Attribute tag; found '%s' instead.", localname);
1294 parser->pop_state();
1297 case inside_attribute_value: {
1298 if (is_not(localname,
"Value"))
1299 DmrppParserSax2::dmr_error(parser,
"Expected an end value tag; found '%s' instead.", localname);
1301 parser->pop_state();
1306 D4Attributes *attrs = parser->top_attributes();
1307 D4Attribute *attr = attrs->get(parser->dods_attr_name);
1309 attr =
new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
1310 attrs->add_attribute_nocopy(attr);
1312 attr->add_value(parser->char_data);
1314 parser->char_data =
"";
1318 case inside_other_xml_attribute: {
1319 if (strcmp(localname,
"Attribute") == 0 && parser->root_ns == (
const char *) URI) {
1320 parser->pop_state();
1325 D4Attributes *attrs = parser->top_attributes();
1326 D4Attribute *attr = attrs->get(parser->dods_attr_name);
1328 attr =
new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
1329 attrs->add_attribute_nocopy(attr);
1331 attr->add_value(parser->other_xml);
1333 parser->other_xml =
"";
1336 if (parser->other_xml_depth == 0) {
1337 DmrppParserSax2::dmr_error(parser,
"Expected an OtherXML attribute to end! Instead I found '%s'",
1341 parser->other_xml_depth--;
1343 parser->other_xml.append(
"</");
1345 parser->other_xml.append((
const char *) prefix);
1346 parser->other_xml.append(
":");
1348 parser->other_xml.append(localname);
1349 parser->other_xml.append(
">");
1354 case inside_enum_def:
1355 if (is_not(localname,
"Enumeration"))
1356 DmrppParserSax2::dmr_error(parser,
"Expected an end Enumeration tag; found '%s' instead.", localname);
1357 if (!parser->top_group())
1358 DmrppParserSax2::dmr_fatal_error(parser,
1359 "Expected a Group to be the current item, while finishing up an Enumeration.");
1362 parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
1365 parser->clear_enum_def();
1366 parser->pop_state();
1370 case inside_enum_const:
1371 if (is_not(localname,
"EnumConst"))
1372 DmrppParserSax2::dmr_error(parser,
"Expected an end EnumConst tag; found '%s' instead.", localname);
1374 parser->pop_state();
1377 case inside_dim_def: {
1378 if (is_not(localname,
"Dimension"))
1379 DmrppParserSax2::dmr_error(parser,
"Expected an end Dimension tag; found '%s' instead.", localname);
1381 if (!parser->top_group())
1382 DmrppParserSax2::dmr_error(parser,
1383 "Expected a Group to be the current item, while finishing up an Dimension.");
1385 parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
1391 parser->clear_dim_def();
1392 parser->pop_state();
1396 case inside_simple_type:
1397 if (is_simple_type(get_type(localname))) {
1398 BaseType *btp = parser->top_basetype();
1399 parser->pop_basetype();
1400 parser->pop_attributes();
1401 BaseType *parent = 0;
1402 if (!parser->empty_basetype())
1403 parent = parser->top_basetype();
1404 else if (!parser->empty_group())
1405 parent = parser->top_group();
1407 dmr_fatal_error(parser,
"Both the Variable and Groups stacks are empty while closing a %s element.",
1410 parser->pop_state();
1413 if (parent->type() == dods_array_c)
1414 static_cast<Array*
>(parent)->prototype()->add_var_nocopy(btp);
1416 parent->add_var_nocopy(btp);
1419 DmrppParserSax2::dmr_error(parser,
"Expected an end tag for a simple type; found '%s' instead.", localname);
1421 parser->pop_state();
1425 if (is_not(localname,
"Dim"))
1426 DmrppParserSax2::dmr_fatal_error(parser,
"Expected an end Dim tag; found '%s' instead.", localname);
1428 parser->pop_state();
1432 if (is_not(localname,
"Map"))
1433 DmrppParserSax2::dmr_fatal_error(parser,
"Expected an end Map tag; found '%s' instead.", localname);
1435 parser->pop_state();
1438 case inside_constructor: {
1439 if (strcmp(localname,
"Structure") != 0 && strcmp(localname,
"Sequence") != 0) {
1440 DmrppParserSax2::dmr_error(parser,
"Expected an end tag for a constructor; found '%s' instead.", localname);
1443 BaseType *btp = parser->top_basetype();
1444 parser->pop_basetype();
1445 parser->pop_attributes();
1446 BaseType *parent = 0;
1447 if (!parser->empty_basetype())
1448 parent = parser->top_basetype();
1449 else if (!parser->empty_group())
1450 parent = parser->top_group();
1452 dmr_fatal_error(parser,
"Both the Variable and Groups stacks are empty while closing a %s element.",
1455 parser->pop_state();
1460 parent->add_var_nocopy(btp);
1461 parser->pop_state();
1465 case not_dap4_element:
1466 BESDEBUG(PARSER, prolog <<
"End of non DAP4 element: " << localname << endl);
1467 parser->pop_state();
1471 case inside_dmrpp_compact_element: {
1472 parser->process_dmrpp_compact_end(localname);
1473 BESDEBUG(PARSER, prolog <<
"End of dmrpp compact element: " << localname << endl);
1474 parser->pop_state();
1479 case inside_dmrpp_object: {
1480 BESDEBUG(PARSER, prolog <<
"End of dmrpp namespace element: " << localname << endl);
1481 parser->pop_state();
1485 case inside_dmrpp_chunkDimensionSizes_element: {
1486 BESDEBUG(PARSER, prolog <<
"End of chunkDimensionSizes element. localname: " << localname << endl);
1488 if (is_not(localname,
"chunkDimensionSizes"))
1489 DmrppParserSax2::dmr_error(parser,
"Expected an end value tag; found '%s' instead.", localname);
1490 DmrppCommon *dc =
dynamic_cast<DmrppCommon*
>(parser->top_basetype());
1492 throw BESInternalError(
"Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
1493 string element_text(parser->char_data);
1494 BESDEBUG(PARSER, prolog <<
"chunkDimensionSizes element_text: '" << element_text <<
"'" << endl);
1495 dc->parse_chunk_dimension_sizes(element_text);
1496 parser->char_data =
"";
1497 parser->pop_state();
1501 case parser_unknown:
1502 parser->pop_state();
1506 case parser_fatal_error:
1515 BESDEBUG(PARSER, prolog <<
"End element exit state: " << states[parser->get_state()] <<
1516 " ("<<parser->get_state()<<
")"<< endl);
1522 void DmrppParserSax2::dmr_get_characters(
void * p,
const xmlChar * ch,
int len)
1526 switch (parser->get_state()) {
1527 case inside_attribute_value:
1528 case inside_dmrpp_chunkDimensionSizes_element:
1529 case inside_dmrpp_compact_element:
1530 parser->char_data.append((
const char *) (ch), len);
1531 BESDEBUG(PARSER, prolog <<
"Characters[" << parser->char_data.size() <<
"]" << parser->char_data <<
"'" << endl);
1534 case inside_other_xml_attribute:
1535 parser->other_xml.append((
const char *) (ch), len);
1536 BESDEBUG(PARSER, prolog <<
"Other XML Characters: '" << parser->other_xml <<
"'" << endl);
1548 void DmrppParserSax2::dmr_ignoreable_whitespace(
void *p,
const xmlChar *ch,
int len)
1552 switch (parser->get_state()) {
1553 case inside_other_xml_attribute:
1554 parser->other_xml.append((
const char *) (ch), len);
1567 void DmrppParserSax2::dmr_get_cdata(
void *p,
const xmlChar *value,
int len)
1571 switch (parser->get_state()) {
1572 case inside_other_xml_attribute:
1573 parser->other_xml.append((
const char *) (value), len);
1576 case parser_unknown:
1580 DmrppParserSax2::dmr_error(parser,
"Found a CData block but none are allowed by DAP4.");
1590 xmlEntityPtr DmrppParserSax2::dmr_get_entity(
void *,
const xmlChar * name)
1592 return xmlGetPredefinedEntity(name);
1605 void DmrppParserSax2::dmr_fatal_error(
void * p,
const char *msg, ...)
1610 parser->push_state(parser_fatal_error);
1612 va_start(args, msg);
1614 vsnprintf(str, 1024, msg, args);
1617 int line = xmlSAX2GetLineNumber(parser->context);
1619 if (!parser->error_msg.empty()) parser->error_msg +=
"\n";
1620 parser->error_msg +=
"At line " + long_to_string(line) +
": " + string(str);
1623 void DmrppParserSax2::dmr_error(
void *p,
const char *msg, ...)
1628 parser->push_state(parser_error);
1630 va_start(args, msg);
1632 vsnprintf(str, 1024, msg, args);
1635 int line = xmlSAX2GetLineNumber(parser->context);
1637 if (!parser->error_msg.empty()) parser->error_msg +=
"\n";
1638 parser->error_msg +=
"At line " + long_to_string(line) +
": " + string(str);
1645 void DmrppParserSax2::cleanup_parse()
1647 bool wellFormed = context->wellFormed;
1648 bool valid = context->valid;
1652 xmlFreeParserCtxt(context);
1662 while (!btp_stack.empty()) {
1663 delete top_basetype();
1668 throw BESInternalError(
"The DMR was not well formed. " + error_msg,__FILE__,__LINE__);
1670 throw BESInternalError(
"The DMR was not valid." + error_msg,__FILE__,__LINE__);
1671 else if (get_state() == parser_error)
1673 else if (get_state() == parser_fatal_error)
throw BESInternalError(error_msg,__FILE__,__LINE__);
1689 void DmrppParserSax2::intern(istream &f, DMR *dest_dmr)
1693 if (!f.good())
throw BESInternalError(prolog +
"ERROR - Supplied istream instance not open or read error",__FILE__,__LINE__);
1694 if (!dest_dmr)
throw BESInternalError(prolog +
"THe supplied DMR object pointer is null", __FILE__, __LINE__);
1703 if (line.length() == 0)
throw BESInternalError(prolog +
"ERROR - No input found when parsing the DMR++",__FILE__,__LINE__);
1705 BESDEBUG(PARSER, prolog <<
"line: (" << line_num <<
"): " << endl << line << endl << endl);
1707 context = xmlCreatePushParserCtxt(&dmrpp_sax_parser,
this, line.c_str(), line.length(),
"stream");
1708 context->validate =
true;
1709 push_state(parser_start);
1712 long chunk_count = 0;
1713 long chunk_size = 0;
1715 f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1716 chunk_size=f.gcount();
1717 d_parse_buffer[chunk_size]=0;
1718 BESDEBUG(PARSER, prolog <<
"chunk: (" << chunk_count++ <<
"): " << endl);
1719 BESDEBUG(PARSER, prolog <<
"d_parse_buffer: (" << d_parse_buffer <<
"): " << endl);
1721 while(!f.eof() && (get_state() != parser_end)){
1723 xmlParseChunk(context, d_parse_buffer, chunk_size, 0);
1726 f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1727 chunk_size=f.gcount();
1728 d_parse_buffer[chunk_size]=0;
1729 BESDEBUG(PARSER, prolog <<
"chunk: (" << chunk_count++ <<
"): " << endl);
1730 BESDEBUG(PARSER, prolog <<
"d_parse_buffer: (" << d_parse_buffer <<
"): " << endl);
1734 xmlParseChunk(context, d_parse_buffer, chunk_size, 1);
1751 void DmrppParserSax2::intern(
const string &document, DMR *dest_dmr)
1753 intern(document.c_str(), document.length(), dest_dmr);
1764 void DmrppParserSax2::intern(
const char *buffer,
int size, DMR *dest_dmr)
1766 if (!(size > 0))
return;
1770 if (!dest_dmr)
throw InternalErr(__FILE__, __LINE__,
"DMR object is null");
1773 push_state(parser_start);
1774 context = xmlCreatePushParserCtxt(&dmrpp_sax_parser,
this, buffer, size,
"stream");
1775 context->validate =
true;
1778 xmlParseChunk(context, buffer, 0, 1);
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
virtual BESCatalog * default_catalog() const
The the default catalog.
const std::string & get_root_dir() const
Get the root directory of the catalog.
Catalogs provide a hierarchical organization for data.
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
static std::ostream * GetStrm()
return the debug stream
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
exception thrown if internal error encountered
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
utility class for the HTTP catalog module