OpenTREP Logo  0.07.9
C++ Open Travel Request Parsing Library
opentrep-searcher.cpp
Go to the documentation of this file.
1 // STL
2 #include <cassert>
3 #include <iostream>
4 #include <sstream>
5 #include <fstream>
6 #include <vector>
7 #include <string>
8 // Boost (Extended STL)
9 #include <boost/date_time/posix_time/posix_time.hpp>
10 #include <boost/date_time/gregorian/gregorian.hpp>
11 #include <boost/tokenizer.hpp>
12 #include <boost/program_options.hpp>
13 // OpenTREP
15 #include <opentrep/DBType.hpp>
18 #include <opentrep/Location.hpp>
19 #include <opentrep/CityDetails.hpp>
20 #include <opentrep/config/opentrep-paths.hpp>
21 
22 
23 // //////// Type definitions ///////
24 typedef std::vector<std::string> WordList_T;
25 
26 
27 // //////// Constants //////
31 const std::string K_OPENTREP_DEFAULT_LOG_FILENAME ("opentrep-searcher.log");
32 
36 const std::string K_OPENTREP_DEFAULT_QUERY_STRING ("sna francisco rio de janero los angeles reykyavki");
37 
46 const unsigned short K_OPENTREP_DEFAULT_SEARCH_TYPE = 0;
47 
52 
53 
54 // //////////////////////////////////////////////////////////////////////
55 void tokeniseStringIntoWordList (const std::string& iPhrase,
56  WordList_T& ioWordList) {
57  // Empty the word list
58  ioWordList.clear();
59 
60  // Boost Tokeniser
61  typedef boost::tokenizer<boost::char_separator<char> > Tokeniser_T;
62 
63  // Define the separators
64  const boost::char_separator<char> lSepatorList(" .,;:|+-*/_=!@#$%`~^&(){}[]?'<>\"");
65 
66  // Initialise the phrase to be tokenised
67  Tokeniser_T lTokens (iPhrase, lSepatorList);
68  for (Tokeniser_T::const_iterator tok_iter = lTokens.begin();
69  tok_iter != lTokens.end(); ++tok_iter) {
70  const std::string& lTerm = *tok_iter;
71  ioWordList.push_back (lTerm);
72  }
73 }
74 
75 // //////////////////////////////////////////////////////////////////////
76 std::string createStringFromWordList (const WordList_T& iWordList) {
77  std::ostringstream oStr;
78 
79  unsigned short idx = iWordList.size();
80  for (WordList_T::const_iterator itWord = iWordList.begin();
81  itWord != iWordList.end(); ++itWord, --idx) {
82  const std::string& lWord = *itWord;
83  oStr << lWord;
84  if (idx > 1) {
85  oStr << " ";
86  }
87  }
88 
89  return oStr.str();
90 }
91 
92 
93 // ///////// Parsing of Options & Configuration /////////
94 // A helper function to simplify the main part.
95 template<class T> std::ostream& operator<< (std::ostream& os,
96  const std::vector<T>& v) {
97  std::copy (v.begin(), v.end(), std::ostream_iterator<T> (os, " "));
98  return os;
99 }
100 
103 
105 int readConfiguration (int argc, char* argv[],
106  unsigned short& ioSpellingErrorDistance,
107  std::string& ioQueryString,
108  std::string& ioXapianDBFilepath,
109  std::string& ioSQLDBTypeString,
110  std::string& ioSQLDBConnectionString,
111  unsigned short& ioDeploymentNumber,
112  std::string& ioLogFilename,
113  unsigned short& ioSearchType,
114  std::ostringstream& oStr) {
115 
116  // Initialise the travel query string, if that one is empty
117  if (ioQueryString.empty() == true) {
118  ioQueryString = K_OPENTREP_DEFAULT_QUERY_STRING;
119  }
120 
121  // Transform the query string into a list of words (STL strings)
122  WordList_T lWordList;
123  tokeniseStringIntoWordList (ioQueryString, lWordList);
124 
125  // Declare a group of options that will be allowed only on command line
126  boost::program_options::options_description generic ("Generic options");
127  generic.add_options()
128  ("prefix", "print installation prefix")
129  ("version,v", "print version string")
130  ("help,h", "produce help message");
131 
132  // Declare a group of options that will be allowed both on command
133  // line and in config file
134  boost::program_options::options_description config ("Configuration");
135  config.add_options()
136  ("error,e",
137  boost::program_options::value< unsigned short >(&ioSpellingErrorDistance)->default_value(K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE),
138  "Spelling error distance (e.g., 3)")
139  ("xapiandb,d",
140  boost::program_options::value< std::string >(&ioXapianDBFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH),
141  "Xapian database filepath (e.g., /tmp/opentrep/xapian_traveldb)")
142  ("sqldbtype,t",
143  boost::program_options::value< std::string >(&ioSQLDBTypeString)->default_value(OPENTREP::DEFAULT_OPENTREP_SQL_DB_TYPE),
144  "SQL database type (e.g., nodb for no SQL database, sqlite for SQLite, mysql for MariaDB/MySQL)")
145  ("sqldbconx,s",
146  boost::program_options::value< std::string >(&ioSQLDBConnectionString),
147  "SQL database connection string (e.g., ~/tmp/opentrep/sqlite_travel.db for SQLite, "
148  "\"db=trep_trep user=trep password=trep\" for MariaDB/MySQL)")
149  ("deploymentnb,m",
150  boost::program_options::value<unsigned short>(&ioDeploymentNumber)->default_value(OPENTREP::DEFAULT_OPENTREP_DEPLOYMENT_NUMBER),
151  "Deployment number (from to N, where N=1 normally)")
152  ("log,l",
153  boost::program_options::value< std::string >(&ioLogFilename)->default_value(K_OPENTREP_DEFAULT_LOG_FILENAME),
154  "Filepath for the logs")
155  ("type,y",
156  boost::program_options::value<unsigned short>(&ioSearchType)->default_value(K_OPENTREP_DEFAULT_SEARCH_TYPE),
157  "Type of search request (0 = full text, 1 = coordinates)")
158  ("query,q",
159  boost::program_options::value< WordList_T >(&lWordList)->multitoken(),
160  "Travel query word list (e.g. sna francisco rio de janero los angeles reykyavki), "
161  "which sould be located at the end of the command line (otherwise, "
162  "the other options would be interpreted as part of that travel query word list)")
163  ;
164 
165  // Hidden options, will be allowed both on command line and
166  // in config file, but will not be shown to the user.
167  boost::program_options::options_description hidden ("Hidden options");
168  hidden.add_options()
169  ("copyright",
170  boost::program_options::value< std::vector<std::string> >(),
171  "Show the copyright (license)");
172 
173  boost::program_options::options_description cmdline_options;
174  cmdline_options.add(generic).add(config).add(hidden);
175 
176  boost::program_options::options_description config_file_options;
177  config_file_options.add(config).add(hidden);
178 
179  boost::program_options::options_description visible ("Allowed options");
180  visible.add(generic).add(config);
181 
182  boost::program_options::positional_options_description p;
183  p.add ("copyright", -1);
184 
185  boost::program_options::variables_map vm;
186  boost::program_options::
187  store (boost::program_options::command_line_parser (argc, argv).
188  options (cmdline_options).positional(p).run(), vm);
189 
190  std::ifstream ifs ("opentrep-searcher.cfg");
191  boost::program_options::store (parse_config_file (ifs, config_file_options),
192  vm);
193  boost::program_options::notify (vm);
194 
195  if (vm.count ("help")) {
196  std::cout << visible << std::endl;
198  }
199 
200  if (vm.count ("version")) {
201  std::cout << PACKAGE_NAME << ", version " << PACKAGE_VERSION << std::endl;
203  }
204 
205  if (vm.count ("prefix")) {
206  std::cout << "Installation prefix: " << PREFIXDIR << std::endl;
208  }
209 
210  if (vm.count ("deploymentnb")) {
211  ioDeploymentNumber = vm["deploymentnb"].as< unsigned short >();
212  oStr << "Deployment number: " << ioDeploymentNumber << std::endl;
213  }
214 
215  if (vm.count ("xapiandb")) {
216  ioXapianDBFilepath = vm["xapiandb"].as< std::string >();
217  oStr << "Xapian database filepath is: " << ioXapianDBFilepath
218  << ioDeploymentNumber << std::endl;
219  }
220 
221  if (vm.count ("sqldbtype")) {
222  ioSQLDBTypeString = vm["sqldbtype"].as< std::string >();
223  oStr << "SQL database type is: " << ioSQLDBTypeString << std::endl;
224  }
225 
226  // Derive the detault connection string depending on the SQL database type
227  const OPENTREP::DBType lDBType (ioSQLDBTypeString);
228  if (lDBType == OPENTREP::DBType::NODB) {
229  ioSQLDBConnectionString = "";
230 
231  } else if (lDBType == OPENTREP::DBType::SQLITE3) {
232  ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_SQLITE_DB_FILEPATH;
233 
234  } else if (lDBType == OPENTREP::DBType::MYSQL) {
235  ioSQLDBConnectionString = OPENTREP::DEFAULT_OPENTREP_MYSQL_CONN_STRING;
236  }
237 
238  // Set the SQL database connection string, if any is given
239  if (vm.count ("sqldbconx")) {
240  ioSQLDBConnectionString = vm["sqldbconx"].as< std::string >();
241  }
242 
243  // Reporting of the SQL database connection string
244  if (lDBType == OPENTREP::DBType::SQLITE3
245  || lDBType == OPENTREP::DBType::MYSQL) {
246  const std::string& lSQLDBConnString =
248  ioSQLDBConnectionString,
249  ioDeploymentNumber);
250  //
251  oStr << "SQL database connection string is: " << lSQLDBConnString
252  << std::endl;
253  }
254 
255  if (vm.count ("log")) {
256  ioLogFilename = vm["log"].as< std::string >();
257  oStr << "Log filename is: " << ioLogFilename << std::endl;
258  }
259 
260  oStr << "The type of search is: " << ioSearchType << std::endl;
261 
262  oStr << "The spelling error distance is: " << ioSpellingErrorDistance
263  << std::endl;
264 
265  ioQueryString = createStringFromWordList (lWordList);
266  oStr << "The travel query string is: " << ioQueryString << std::endl;
267 
268  return 0;
269 }
270 
274 std::string parseQuery (OPENTREP::OPENTREP_Service& ioOpentrepService,
275  const OPENTREP::TravelQuery_T& iTravelQuery) {
276  std::ostringstream oStr;
277 
278  // Query the Xapian database (index)
279  OPENTREP::WordList_T lNonMatchedWordList;
280  OPENTREP::LocationList_T lLocationList;
281  const OPENTREP::NbOfMatches_T nbOfMatches =
282  ioOpentrepService.interpretTravelRequest (iTravelQuery, lLocationList,
283  lNonMatchedWordList);
284 
285  oStr << nbOfMatches << " (geographical) location(s) have been found "
286  << "matching your query (`" << iTravelQuery << "'). "
287  << lNonMatchedWordList.size() << " word(s) was/were left unmatched."
288  << std::endl;
289 
290  if (nbOfMatches != 0) {
291  OPENTREP::NbOfMatches_T idx = 1;
292  for (OPENTREP::LocationList_T::const_iterator itLocation =
293  lLocationList.begin();
294  itLocation != lLocationList.end(); ++itLocation, ++idx) {
295  const OPENTREP::Location& lLocation = *itLocation;
296  oStr << " [" << idx << "]: " << lLocation << std::endl;
297  }
298  }
299 
300  if (lNonMatchedWordList.empty() == false) {
301  oStr << "List of unmatched words:" << std::endl;
302 
303  OPENTREP::NbOfMatches_T idx = 1;
304  for (OPENTREP::WordList_T::const_iterator itWord =
305  lNonMatchedWordList.begin();
306  itWord != lNonMatchedWordList.end(); ++itWord, ++idx) {
307  const OPENTREP::Word_T& lWord = *itWord;
308  oStr << " [" << idx << "]: " << lWord << std::endl;
309  }
310  }
311 
312  return oStr.str();
313 }
314 
315 // /////////////// M A I N /////////////////
316 int main (int argc, char* argv[]) {
317 
318  // Travel query
319  OPENTREP::TravelQuery_T lTravelQuery;
320 
321  // Output log File
322  std::string lLogFilename;
323 
324  // Xapian database name (directory of the index)
325  std::string lXapianDBNameStr;
326 
327  // Type of search
328  unsigned short lSearchType;
329 
330  // Xapian spelling error distance
331  unsigned short lSpellingErrorDistance;
332 
333  // SQL database type
334  std::string lSQLDBTypeStr;
335 
336  // SQL database connection string
337  std::string lSQLDBConnectionStr;
338 
339  // Deployment number/version
340  OPENTREP::DeploymentNumber_T lDeploymentNumber;
341 
342  // Log stream for the introduction part
343  std::ostringstream oIntroStr;
344 
345  // Call the command-line option parser
346  const int lOptionParserStatus =
347  readConfiguration (argc, argv, lSpellingErrorDistance, lTravelQuery,
348  lXapianDBNameStr, lSQLDBTypeStr, lSQLDBConnectionStr,
349  lDeploymentNumber, lLogFilename, lSearchType, oIntroStr);
350 
351  if (lOptionParserStatus == K_OPENTREP_EARLY_RETURN_STATUS) {
352  return 0;
353  }
354 
355  // Set the log parameters
356  std::ofstream logOutputFile;
357  // open and clean the log outputfile
358  logOutputFile.open (lLogFilename.c_str());
359  logOutputFile.clear();
360 
361  // Report the parameters
362  std::cout << oIntroStr.str();
363 
364  // DEBUG
365  // Get the current time in UTC Timezone
366  boost::posix_time::ptime lTimeUTC =
367  boost::posix_time::second_clock::universal_time();
368  logOutputFile << "[" << lTimeUTC << "][" << __FILE__ << "#"
369  << __LINE__ << "]:Parameters:" << std::endl
370  << oIntroStr.str() << std::endl;
371 
372  //
373  std::ostringstream oStr;
374  if (lSearchType == 0) {
375  // Initialise the context
376  const OPENTREP::TravelDBFilePath_T lXapianDBName (lXapianDBNameStr);
377  const OPENTREP::DBType lDBType (lSQLDBTypeStr);
378  const OPENTREP::SQLDBConnectionString_T lSQLDBConnStr (lSQLDBConnectionStr);
379  OPENTREP::OPENTREP_Service opentrepService (logOutputFile, lXapianDBName,
380  lDBType, lSQLDBConnStr,
381  lDeploymentNumber);
382 
383  // Check the directory of the Xapian database/index exists and is accessible
385  opentrepService.getFilePaths();
386  const OPENTREP::TravelDBFilePath_T& lActualXapianDBDir= lFPSet.second.first;
387  const bool lExistXapianDBDir =
388  opentrepService.checkXapianDBOnFileSystem (lActualXapianDBDir);
389  if (lExistXapianDBDir == false) {
390  std::ostringstream errorStr;
391  errorStr << "Error - The file-path to the Xapian database/index ('"
392  << lActualXapianDBDir
393  << "') does not exist or is not a directory." << std::endl;
394  errorStr << "\tThat usually means that the OpenTREP indexer "
395  << "(opentrep-indexer) has not been launched yet, "
396  << "or that it has operated on a different Xapian "
397  << "database/index file-path." << std::endl;
398  errorStr << "\tFor instance the Xapian database/index may have been "
399  << "created with a different deployment number ("
400  << lDeploymentNumber << " being the current deployment number)";
401  std::cerr << errorStr.str() << std::endl;
402  return -1;
403  }
404 
405  // Parse the query and retrieve the places from Xapian only
406  const std::string& lOutput = parseQuery (opentrepService, lTravelQuery);
407  oStr << lOutput;
408 
409  } else {
410  oStr << "Finding the airports closest to: " << lTravelQuery << std::endl;
411  }
412 
413  //
414  std::cout << oStr.str();
415 
416  // Get the current time in UTC Timezone
417  lTimeUTC = boost::posix_time::second_clock::universal_time();
418  logOutputFile << "[" << lTimeUTC << "][" << __FILE__ << "#"
419  << __LINE__ << "]:Results:" << std::endl
420  << oStr.str() << std::endl;
421 
422  // Close the Log outputFile
423  logOutputFile.close();
424 
425  return 0;
426 }
Interface for the OPENTREP Services.
bool checkXapianDBOnFileSystem(const TravelDBFilePath_T &) const
std::pair< const PORFilePath_T, const DBFilePathPair_T > FilePathSet_T
FilePathSet_T getFilePaths() const
NbOfMatches_T interpretTravelRequest(const std::string &iTravelQuery, LocationList_T &, WordList_T &)
const std::string DEFAULT_OPENTREP_SQLITE_DB_FILEPATH
std::list< Word_T > WordList_T
std::string Word_T
std::string TravelQuery_T
const std::string DEFAULT_OPENTREP_SQL_DB_TYPE
std::string parseAndDisplayConnectionString(const DBType &iDBType, const std::string &iSQLDBConnStr, const DeploymentNumber_T &iDeploymentNumber)
Definition: Utilities.cpp:273
std::list< Location > LocationList_T
const unsigned short DEFAULT_OPENTREP_DEPLOYMENT_NUMBER
const std::string DEFAULT_OPENTREP_MYSQL_CONN_STRING
unsigned short DeploymentNumber_T
const std::string DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH
unsigned short NbOfMatches_T
std::vector< std::string > WordList_T
const int K_OPENTREP_EARLY_RETURN_STATUS
std::string createStringFromWordList(const WordList_T &iWordList)
int main(int argc, char *argv[])
const unsigned short K_OPENTREP_DEFAULT_SEARCH_TYPE
int readConfiguration(int argc, char *argv[], unsigned short &ioSpellingErrorDistance, std::string &ioQueryString, std::string &ioXapianDBFilepath, std::string &ioSQLDBTypeString, std::string &ioSQLDBConnectionString, unsigned short &ioDeploymentNumber, std::string &ioLogFilename, unsigned short &ioSearchType, std::ostringstream &oStr)
std::ostream & operator<<(std::ostream &os, const std::vector< T > &v)
const std::string K_OPENTREP_DEFAULT_LOG_FILENAME("opentrep-searcher.log")
void tokeniseStringIntoWordList(const std::string &iPhrase, WordList_T &ioWordList)
std::vector< std::string > WordList_T
std::string parseQuery(OPENTREP::OPENTREP_Service &ioOpentrepService, const OPENTREP::TravelQuery_T &iTravelQuery)
const unsigned short K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE
const std::string K_OPENTREP_DEFAULT_QUERY_STRING("sna francisco rio de janero los angeles reykyavki")
Enumeration of database types.
Definition: DBType.hpp:17
Structure modelling a (geographical) location.
Definition: Location.hpp:25