#include <WPS4.h>
Public Member Functions | |
WPS4Parser (WPSInputStream *input, WPSHeader *header) | |
This class parses Works version 2 through 4. | |
~WPS4Parser () | |
void | parse (WPXHLListenerImpl *listenerImpl) |
Private Member Functions | |
void | parsePages (std::list< WPSPageSpan > &pageList, WPSInputStream *input) |
Read the page format from the file. | |
void | parse (WPSInputStream *stream, WPS4ContentListener *listener) |
void | readFontsTable (WPSInputStream *input) |
Reads fonts table into memory. | |
bool | readFODPage (WPSInputStream *input, std::vector< FOD > *FODs) |
Read a single "page" (128 bytes) that contains formatting descriptors (FODs) for either characters OR paragraphs. | |
void | propertyChangeTextAttribute (const uint32_t newTextAttributeBits, const uint8_t attribute, const uint32_t bit, WPS4ContentListener *listener) |
void | propertyChangeDelta (uint32_t newTextAttributeBits, WPS4ContentListener *listener) |
void | propertyChange (std::string rgchProp, WPS4ContentListener *listener) |
Process a character property change. | |
void | appendCP850 (const uint8_t readVal, WPS4ContentListener *listener) |
Take a character in CP850 encoding, convert it and append it to the text buffer as UTF8. | |
void | appendCP1252 (const uint8_t readVal, WPS4ContentListener *listener) |
Take a character in CP1252 encoding, convert it and append it to the text buffer as UTF8. | |
void | readText (WPSInputStream *input, WPS4ContentListener *listener) |
Read the text of the document using previously-read formatting information. | |
Private Attributes | |
uint32_t | oldTextAttributeBits |
uint32_t | offset_eot |
uint32_t | offset_eos |
std::vector< FOD > | CHFODs |
std::vector< FOD > | PAFODs |
std::map< uint8_t, std::string > | fonts |
const uint8_t | m_worksVersion |
WPS4Parser::WPS4Parser | ( | WPSInputStream * | input, | |
WPSHeader * | header | |||
) |
This class parses Works version 2 through 4.
WPS4Parser::~WPS4Parser | ( | ) |
void WPS4Parser::appendCP1252 | ( | const uint8_t | readVal, | |
WPS4ContentListener * | listener | |||
) | [private] |
Take a character in CP1252 encoding, convert it and append it to the text buffer as UTF8.
Courtesy of glib2 and iconv
Referenced by readText().
void WPS4Parser::appendCP850 | ( | const uint8_t | readVal, | |
WPS4ContentListener * | listener | |||
) | [private] |
Take a character in CP850 encoding, convert it and append it to the text buffer as UTF8.
Courtesy of glib2 and iconv
Referenced by readText().
void WPS4Parser::parse | ( | WPSInputStream * | stream, | |
WPS4ContentListener * | listener | |||
) | [private] |
void WPS4Parser::parse | ( | WPXHLListenerImpl * | listenerImpl | ) | [virtual] |
Implements WPSParser.
Referenced by WPSDocument::parse().
void WPS4Parser::parsePages | ( | std::list< WPSPageSpan > & | pageList, | |
WPSInputStream * | input | |||
) | [private] |
Read the page format from the file.
It seems that WPS4 files can only have one page format throughout the whole document.
Referenced by parse().
void WPS4Parser::propertyChange | ( | std::string | rgchProp, | |
WPS4ContentListener * | listener | |||
) | [private] |
Process a character property change.
The Works format supplies all the character formatting each time there is any change (as opposed to HTML, for example). In Works 4, the position in in rgchProp is significant (e.g., bold is always in the first byte).
Referenced by readText().
void WPS4Parser::propertyChangeDelta | ( | uint32_t | newTextAttributeBits, | |
WPS4ContentListener * | listener | |||
) | [private] |
newTextAttributeBits,: | all the new, current bits (will be compared against old, and old will be discarded). |
Referenced by propertyChange().
void WPS4Parser::propertyChangeTextAttribute | ( | const uint32_t | newTextAttributeBits, | |
const uint8_t | attribute, | |||
const uint32_t | bit, | |||
WPS4ContentListener * | listener | |||
) | [private] |
Referenced by propertyChangeDelta().
bool WPS4Parser::readFODPage | ( | WPSInputStream * | input, | |
std::vector< FOD > * | FODs | |||
) | [private] |
Read a single "page" (128 bytes) that contains formatting descriptors (FODs) for either characters OR paragraphs.
Starts reading at current position in stream.
Return: true if more pages of this type exist, otherwise false
Referenced by parse().
void WPS4Parser::readFontsTable | ( | WPSInputStream * | input | ) | [private] |
Reads fonts table into memory.
Referenced by parse().
void WPS4Parser::readText | ( | WPSInputStream * | input, | |
WPS4ContentListener * | listener | |||
) | [private] |
Read the text of the document using previously-read formatting information.
Referenced by parse().
std::vector<FOD> WPS4Parser::CHFODs [private] |
Referenced by parse(), and readText().
std::map<uint8_t, std::string> WPS4Parser::fonts [private] |
Referenced by propertyChange(), and readFontsTable().
const uint8_t WPS4Parser::m_worksVersion [private] |
Referenced by readText().
uint32_t WPS4Parser::offset_eos [private] |
uint32_t WPS4Parser::offset_eot [private] |
Referenced by parse(), and readFODPage().
uint32_t WPS4Parser::oldTextAttributeBits [private] |
Referenced by propertyChangeDelta(), propertyChangeTextAttribute(), and readText().
std::vector<FOD> WPS4Parser::PAFODs [private] |
Referenced by parse().