00001
00002
00003
00004
00005
00006
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "../config.h"
00021 #include <iostream>
00022 #include <fstream>
00023 #include <stdlib.h>
00024 #include <stdio.h>
00025 #include <string>
00026 #include "ParserEventGeneratorKit.h"
00027 #include "libofx.h"
00028 #include "messages.hh"
00029 #include "ofx_sgml.hh"
00030 #include "ofc_sgml.hh"
00031 #include "ofx_preproc.hh"
00032 #include "ofx_utilities.hh"
00033 #ifdef HAVE_ICONV
00034 #include <iconv.h>
00035 #endif
00036
00037 #ifdef OS_WIN32
00038 # include "win32.hh"
00039 #endif
00040
00041 #define LIBOFX_DEFAULT_INPUT_ENCODING "CP1252"
00042 #define LIBOFX_DEFAULT_OUTPUT_ENCODING "UTF-8"
00043
00044 using namespace std;
00048 #ifdef MAKEFILE_DTD_PATH
00049 const int DTD_SEARCH_PATH_NUM = 4;
00050 #else
00051 const int DTD_SEARCH_PATH_NUM = 3;
00052 #endif
00053
00057 const char *DTD_SEARCH_PATH[DTD_SEARCH_PATH_NUM] = {
00058 #ifdef MAKEFILE_DTD_PATH
00059 MAKEFILE_DTD_PATH ,
00060 #endif
00061 "/usr/local/share/libofx/dtd/",
00062 "/usr/share/libofx/dtd/",
00063 "~/"};
00064 const unsigned int READ_BUFFER_SIZE = 1024;
00065
00070 CFCT int ofx_proc_file(LibofxContextPtr ctx, const char * p_filename)
00071 {
00072 LibofxContext *libofx_context;
00073 bool ofx_start=false;
00074 bool ofx_end=false;
00075
00076 ifstream input_file;
00077 ofstream tmp_file;
00078 char buffer[READ_BUFFER_SIZE];
00079 char iconv_buffer[READ_BUFFER_SIZE * 2];
00080 string s_buffer;
00081 char *filenames[3];
00082 char tmp_filename[256];
00083 #ifdef HAVE_ICONV
00084 iconv_t conversion_descriptor;
00085 #endif
00086 libofx_context=(LibofxContext*)ctx;
00087
00088 if(p_filename!=NULL&&strcmp(p_filename,"")!=0)
00089 {
00090 message_out(DEBUG, string("ofx_proc_file():Opening file: ")+ p_filename);
00091
00092 input_file.open(p_filename);
00093 mkTempFileName("libofxtmpXXXXXX", tmp_filename, sizeof(tmp_filename));
00094 mkstemp(tmp_filename);
00095 tmp_file.open(tmp_filename);
00096
00097 message_out(DEBUG,"ofx_proc_file(): Creating temp file: "+string(tmp_filename));
00098 if(!input_file){
00099 message_out(ERROR,"ofx_proc_file():Unable to open the input file "+string(p_filename));
00100 }
00101 else if(!tmp_file){
00102 message_out(ERROR,"ofx_proc_file():Unable to open the output file "+string(tmp_filename));
00103 }
00104 else
00105 {
00106 int header_separator_idx;
00107 string header_name;
00108 string header_value;
00109 string ofx_encoding;
00110 string ofx_charset;
00111 do {
00112 input_file.getline(buffer, sizeof(buffer),'\n');
00113
00114 s_buffer.assign(buffer);
00115
00116 if(input_file.gcount()<(sizeof(buffer)-1))
00117 {
00118 s_buffer.append("\n");
00119 }
00120 else if( !input_file.eof()&&input_file.fail())
00121 {
00122 input_file.clear();
00123 }
00124 int ofx_start_idx;
00125 if (ofx_start==false &&
00126 (
00127 (libofx_context->currentFileType()==OFX&&
00128 ((ofx_start_idx=s_buffer.find("<OFX>"))!=
00129 string::npos||(ofx_start_idx=s_buffer.find("<ofx>"))!=string::npos))
00130 || (libofx_context->currentFileType()==OFC&&
00131 ((ofx_start_idx=s_buffer.find("<OFC>"))!=string::npos||
00132 (ofx_start_idx=s_buffer.find("<ofc>"))!=string::npos))
00133 )
00134 )
00135 {
00136 ofx_start=true;
00137 s_buffer.erase(0,ofx_start_idx);
00138 message_out(DEBUG,"ofx_proc_file():<OFX> or <OFC> has been found");
00139 #ifdef HAVE_ICONV
00140 string fromcode;
00141 string tocode;
00142 if(ofx_encoding.compare("USASCII")==0){
00143 if(ofx_charset.compare("ISO-8859-1")==0){
00144 fromcode="ISO-8859-1";
00145 }
00146 else if(ofx_charset.compare("1252")==0){
00147 fromcode="CP1252";
00148 }
00149 else if(ofx_charset.compare("NONE")==0){
00150 fromcode=LIBOFX_DEFAULT_INPUT_ENCODING;
00151 }
00152 }
00153 else if(ofx_encoding.compare("USASCII")==0) {
00154 fromcode="UTF-8";
00155 }
00156 else
00157 {
00158 fromcode=LIBOFX_DEFAULT_INPUT_ENCODING;
00159 }
00160 tocode = LIBOFX_DEFAULT_OUTPUT_ENCODING;
00161 message_out(DEBUG,"ofx_proc_file(): Setting up iconv for fromcode: "+fromcode+", tocode: "+tocode);
00162 conversion_descriptor = iconv_open (tocode.c_str(), fromcode.c_str());
00163 #endif
00164 }
00165 else {
00166
00167 if ((header_separator_idx=s_buffer.find(':')) != string::npos) {
00168
00169 header_name.assign(s_buffer.substr(0,header_separator_idx));
00170 header_value.assign(s_buffer.substr(header_separator_idx+1));
00171 message_out(DEBUG,"ofx_proc_file():Header: "+header_name+" with value: "+header_value+" has been found");
00172 if(header_name.compare("ENCODING")==0) {
00173 ofx_encoding.assign(header_value);
00174 }
00175 if(header_name.compare("CHARSET")==0) {
00176 ofx_charset.assign(header_value);
00177 }
00178 }
00179 }
00180
00181 if(ofx_start==true && ofx_end==false){
00182 s_buffer=sanitize_proprietary_tags(s_buffer);
00183
00184 #ifdef HAVE_ICONV
00185 memset(iconv_buffer,0,READ_BUFFER_SIZE * 2);
00186 size_t inbytesleft = strlen(s_buffer.c_str());
00187 size_t outbytesleft = READ_BUFFER_SIZE * 2 - 1;
00188 #ifdef OS_WIN32
00189 const char * inchar = (const char *)s_buffer.c_str();
00190 #else
00191 char * inchar = (char *)s_buffer.c_str();
00192 #endif
00193 char * outchar = iconv_buffer;
00194 int iconv_retval = iconv (conversion_descriptor,
00195 &inchar, &inbytesleft,
00196 &outchar, &outbytesleft);
00197 if(iconv_retval==-1){
00198 message_out(ERROR,"ofx_proc_file(): Conversion error");
00199 }
00200 s_buffer = iconv_buffer;
00201 #endif
00202 tmp_file.write(s_buffer.c_str(), s_buffer.length());
00203 }
00204
00205 if (ofx_start==true &&
00206 (
00207 (libofx_context->currentFileType()==OFX &&
00208 ((ofx_start_idx=s_buffer.find("</OFX>"))!=string::npos ||
00209 (ofx_start_idx=s_buffer.find("</ofx>"))!=string::npos))
00210 || (libofx_context->currentFileType()==OFC &&
00211 ((ofx_start_idx=s_buffer.find("</OFC>"))!=string::npos ||
00212 (ofx_start_idx=s_buffer.find("</ofc>"))!=string::npos))
00213 )
00214 )
00215 {
00216 ofx_end=true;
00217 message_out(DEBUG,"ofx_proc_file():</OFX> or </OFC> has been found");
00218 }
00219
00220 } while(!input_file.eof()&&!input_file.bad());
00221 }
00222 input_file.close();
00223 tmp_file.close();
00224 #ifdef HAVE_ICONV
00225 iconv_close(conversion_descriptor);
00226 #endif
00227 char filename_openspdtd[255];
00228 char filename_dtd[255];
00229 char filename_ofx[255];
00230 strncpy(filename_openspdtd,find_dtd(ctx, OPENSPDCL_FILENAME).c_str(),255);
00231 if(libofx_context->currentFileType()==OFX)
00232 {
00233 strncpy(filename_dtd,find_dtd(ctx, OFX160DTD_FILENAME).c_str(),255);
00234 }
00235 else if(libofx_context->currentFileType()==OFC)
00236 {
00237 strncpy(filename_dtd,find_dtd(ctx, OFCDTD_FILENAME).c_str(),255);
00238 }
00239 else
00240 {
00241 message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00242 }
00243
00244 if((string)filename_dtd!="" && (string)filename_openspdtd!="")
00245 {
00246 strncpy(filename_ofx,tmp_filename,255);
00247 filenames[0]=filename_openspdtd;
00248 filenames[1]=filename_dtd;
00249 filenames[2]=filename_ofx;
00250 if(libofx_context->currentFileType()==OFX)
00251 {
00252 ofx_proc_sgml(libofx_context, 3,filenames);
00253 }
00254 else if(libofx_context->currentFileType()==OFC)
00255 {
00256 ofc_proc_sgml(libofx_context, 3,filenames);
00257 }
00258 else
00259 {
00260 message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00261 }
00262 if(remove(tmp_filename)!=0)
00263 {
00264 message_out(ERROR,"ofx_proc_file(): Error deleting temporary file "+string(tmp_filename));
00265 }
00266 }
00267 else
00268 {
00269 message_out(ERROR,"ofx_proc_file(): FATAL: Missing DTD, aborting");
00270 }
00271 }
00272 else{
00273 message_out(ERROR,"ofx_proc_file():No input file specified");
00274 }
00275 return 0;
00276 }
00277
00278
00279
00280 CFCT int libofx_proc_buffer(LibofxContextPtr ctx,
00281 const char *s, unsigned int size){
00282 ofstream tmp_file;
00283 string s_buffer;
00284 char *filenames[3];
00285 char tmp_filename[256];
00286 ssize_t pos;
00287 LibofxContext *libofx_context;
00288
00289 libofx_context=(LibofxContext*)ctx;
00290
00291 if (size==0) {
00292 message_out(ERROR,
00293 "ofx_proc_file(): bad size");
00294 return -1;
00295 }
00296 s_buffer=string(s, size);
00297
00298 mkTempFileName("libofxtmpXXXXXX", tmp_filename, sizeof(tmp_filename));
00299 mkstemp(tmp_filename);
00300 tmp_file.open(tmp_filename);
00301
00302 message_out(DEBUG,"ofx_proc_file(): Creating temp file: "+string(tmp_filename));
00303 if(!tmp_file){
00304 message_out(ERROR,"ofx_proc_file():Unable to open the output file "+string(tmp_filename));
00305 return -1;
00306 }
00307
00308 if (libofx_context->currentFileType()==OFX) {
00309 pos=s_buffer.find("<OFX>");
00310 if (pos==string::npos)
00311 pos=s_buffer.find("<ofx>");
00312 }
00313 else if (libofx_context->currentFileType()==OFC) {
00314 pos=s_buffer.find("<OFC>");
00315 if (pos==string::npos)
00316 pos=s_buffer.find("<ofc>");
00317 }
00318 else {
00319 message_out(ERROR,"ofx_proc(): unknown file type");
00320 return -1;
00321 }
00322 if (pos==string::npos || pos > s_buffer.size()) {
00323 message_out(ERROR,"ofx_proc():<OFX> has not been found");
00324 return -1;
00325 }
00326 else {
00327
00328 s_buffer.erase(0, pos);
00329 message_out(DEBUG,"ofx_proc_file():<OF?> has been found");
00330 }
00331
00332 if (libofx_context->currentFileType()==OFX) {
00333 pos=s_buffer.find("</OFX>");
00334 if (pos==string::npos)
00335 pos=s_buffer.find("</ofx>");
00336 }
00337 else if (libofx_context->currentFileType()==OFC) {
00338 pos=s_buffer.find("</OFC>");
00339 if (pos==string::npos)
00340 pos=s_buffer.find("</ofc>");
00341 }
00342 else {
00343 message_out(ERROR,"ofx_proc(): unknown file type");
00344 return -1;
00345 }
00346
00347 if (pos==string::npos || pos > s_buffer.size()) {
00348 message_out(ERROR,"ofx_proc():</OF?> has not been found");
00349 return -1;
00350 }
00351 else {
00352
00353 if (s_buffer.size() > pos+6)
00354 s_buffer.erase(pos+6);
00355 message_out(DEBUG,"ofx_proc_file():<OFX> has been found");
00356 }
00357
00358 s_buffer=sanitize_proprietary_tags(s_buffer);
00359 tmp_file.write(s_buffer.c_str(), s_buffer.length());
00360
00361 tmp_file.close();
00362
00363 char filename_openspdtd[255];
00364 char filename_dtd[255];
00365 char filename_ofx[255];
00366 strncpy(filename_openspdtd,find_dtd(ctx, OPENSPDCL_FILENAME).c_str(),255);
00367 if(libofx_context->currentFileType()==OFX){
00368 strncpy(filename_dtd,find_dtd(ctx, OFX160DTD_FILENAME).c_str(),255);
00369 }
00370 else if(libofx_context->currentFileType()==OFC){
00371 strncpy(filename_dtd,find_dtd(ctx, OFCDTD_FILENAME).c_str(),255);
00372 }
00373 else {
00374 message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00375 }
00376
00377 if((string)filename_dtd!="" && (string)filename_openspdtd!=""){
00378 strncpy(filename_ofx,tmp_filename,255);
00379 filenames[0]=filename_openspdtd;
00380 filenames[1]=filename_dtd;
00381 filenames[2]=filename_ofx;
00382 if(libofx_context->currentFileType()==OFX){
00383 ofx_proc_sgml(libofx_context, 3,filenames);
00384 }
00385 else if(libofx_context->currentFileType()==OFC){
00386 ofc_proc_sgml(libofx_context, 3,filenames);
00387 }
00388 else {
00389 message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00390 }
00391 if(remove(tmp_filename)!=0){
00392 message_out(ERROR,"ofx_proc_file(): Error deleting temporary file "+string(tmp_filename));
00393 }
00394 }
00395 else {
00396 message_out(ERROR,"ofx_proc_file(): FATAL: Missing DTD, aborting");
00397 }
00398
00399 return 0;
00400 }
00401
00402
00403
00404
00405
00406
00411 string sanitize_proprietary_tags(string input_string)
00412 {
00413 unsigned int i;
00414 size_t input_string_size;
00415 bool strip=false;
00416 bool tag_open=false;
00417 int tag_open_idx=0;
00418 bool closing_tag_open=false;
00419 int orig_tag_open_idx=0;
00420 bool proprietary_tag=false;
00421 bool proprietary_closing_tag=false;
00422 int crop_end_idx=0;
00423 char buffer[READ_BUFFER_SIZE]="";
00424 char tagname[READ_BUFFER_SIZE]="";
00425 int tagname_idx=0;
00426 char close_tagname[READ_BUFFER_SIZE]="";
00427
00428 for(i=0;i<READ_BUFFER_SIZE;i++){
00429 buffer[i]=0;
00430 tagname[i]=0;
00431 close_tagname[i]=0;
00432 }
00433
00434 input_string_size=input_string.size();
00435
00436 for(i=0;i<=input_string_size;i++){
00437 if(input_string.c_str()[i]=='<'){
00438 tag_open=true;
00439 tag_open_idx=i;
00440 if(proprietary_tag==true&&input_string.c_str()[i+1]=='/'){
00441
00442 closing_tag_open=true;
00443
00444 if(strncmp(tagname,&(input_string.c_str()[i+2]),strlen(tagname))!=0){
00445
00446
00447 crop_end_idx=i-1;
00448 strip=true;
00449 }
00450 else{
00451
00452 proprietary_closing_tag=true;
00453 }
00454 }
00455 else if(proprietary_tag==true){
00456
00457 crop_end_idx=i-1;
00458 strip=true;
00459 }
00460 }
00461 else if(input_string.c_str()[i]=='>'){
00462 tag_open=false;
00463 closing_tag_open=false;
00464 tagname[tagname_idx]=0;
00465 tagname_idx=0;
00466 if(proprietary_closing_tag==true){
00467 crop_end_idx=i;
00468 strip=true;
00469 }
00470 }
00471 else if(tag_open==true&&closing_tag_open==false){
00472 if(input_string.c_str()[i]=='.'){
00473 if(proprietary_tag!=true){
00474 orig_tag_open_idx = tag_open_idx;
00475 proprietary_tag=true;
00476 }
00477 }
00478 tagname[tagname_idx]=input_string.c_str()[i];
00479 tagname_idx++;
00480 }
00481
00482 if(strip==true && orig_tag_open_idx < input_string.size())
00483 {
00484 input_string.copy(buffer,(crop_end_idx-orig_tag_open_idx)+1,orig_tag_open_idx);
00485 message_out(INFO,"sanitize_proprietary_tags() (end tag or new tag) removed: "+string(buffer));
00486 input_string.erase(orig_tag_open_idx,(crop_end_idx-orig_tag_open_idx)+1);
00487 i=orig_tag_open_idx-1;
00488 proprietary_tag=false;
00489 proprietary_closing_tag=false;
00490 closing_tag_open=false;
00491 tag_open=false;
00492 strip=false;
00493 }
00494
00495 }
00496 if(proprietary_tag==true && orig_tag_open_idx < input_string.size()){
00497 if(crop_end_idx==0){
00498 crop_end_idx=input_string.size()-1;
00499 }
00500 input_string.copy(buffer,(crop_end_idx-orig_tag_open_idx)+1,orig_tag_open_idx);
00501 message_out(INFO,"sanitize_proprietary_tags() (end of line) removed: "+string(buffer));
00502 input_string.erase(orig_tag_open_idx,(crop_end_idx-orig_tag_open_idx)+1);
00503 }
00504 return input_string;
00505 }
00506
00507
00508
00514 string find_dtd(LibofxContextPtr ctx, string dtd_filename)
00515 {
00516 int i;
00517 ifstream dtd_file;
00518 string dtd_path_filename;
00519 bool dtd_found=false;
00520
00521 dtd_path_filename=((LibofxContext*)ctx)->dtdDir();
00522 if (!dtd_path_filename.empty()) {
00523 dtd_path_filename.append(dtd_filename);
00524 dtd_file.clear();
00525 dtd_file.open(dtd_path_filename.c_str());
00526 if(dtd_file){
00527 message_out(STATUS,"find_dtd():DTD found: "+dtd_path_filename);
00528 dtd_file.close();
00529 dtd_found=true;
00530 }
00531 }
00532
00533 if (!dtd_found) {
00534 for(i=0;i<DTD_SEARCH_PATH_NUM&&dtd_found==false;i++){
00535 dtd_path_filename=DTD_SEARCH_PATH[i];
00536 dtd_path_filename.append(dtd_filename);
00537 dtd_file.clear();
00538 dtd_file.open(dtd_path_filename.c_str());
00539 if(!dtd_file){
00540 message_out(DEBUG,"find_dtd():Unable to open the file "+dtd_path_filename);
00541 }
00542 else{
00543 message_out(STATUS,"find_dtd():DTD found: "+dtd_path_filename);
00544 dtd_file.close();
00545 dtd_found=true;
00546 }
00547 }
00548 }
00549
00550 if(dtd_found==false){
00551 message_out(ERROR,"find_dtd():Unable to find the DTD named " + dtd_filename);
00552 dtd_path_filename="";
00553 }
00554 return dtd_path_filename;
00555 }
00556
00557