/* * Unicode manipulation functions * * by WangLu * 2012.11.29 */ #include #include #include "unicode.h" namespace pdf2htmlEX { using std::cerr; using std::endl; using std::ostream; bool isLegalUnicode(Unicode u) { /* if((u == 9) || (u == 10) || (u == 13)) return true; */ if(u <= 31) return false; if((u >= 127) && (u <= 159)) return false; if((u >= 0xd800) && (u <= 0xdfff)) return false; return true; } Unicode map_to_private(CharCode code) { Unicode private_mapping = (Unicode)(code + 0xE000); if(private_mapping > 0xF8FF) { private_mapping = (Unicode)((private_mapping - 0xF8FF) + 0xF0000); if(private_mapping > 0xFFFFD) { private_mapping = (Unicode)((private_mapping - 0xFFFFD) + 0x100000); if(private_mapping > 0x10FFFD) { cerr << "Warning: all private use unicode are used" << endl; } } } return private_mapping; } Unicode unicode_from_font (CharCode code, GfxFont * font) { if(!font->isCIDFont()) { char * cname = dynamic_cast(font)->getCharName(code); // may be untranslated ligature if(cname) { Unicode ou = globalParams->mapNameToUnicode(cname); if(isLegalUnicode(ou)) return ou; } } return map_to_private(code); } Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font) { if(len == 0) return map_to_private(code); if(len == 1) { if(isLegalUnicode(*u)) return *u; } return unicode_from_font(code, font); } } //namespace pdf2htmlEX