From 1ac17806967d1bef9b10dcd43450ddb02da3141d Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Fri, 17 Aug 2012 15:25:10 +0800 Subject: [PATCH] add a function detecting illegal unicode in html --- src/util.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/util.h b/src/util.h index 92c9c5e..1e7a740 100644 --- a/src/util.h +++ b/src/util.h @@ -43,6 +43,28 @@ static inline bool _tm_equal(const double * tm1, const double * tm2, int size = return true; } +/* + * http://en.wikipedia.org/wiki/HTML_decimal_character_rendering + */ +static inline bool isLegalUnicode(Unicode u) +{ + /* + if((u == 9) || (u == 10) || (u == 13)) + return true; + */ + + if(u <= 31) + return false; + + if((u >= 127) && (u <= 159)) + return false; + + if((u >= 0xd800) && (u <= 0xdfff)) + return false; + + return true; +} + static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen) { for(int i = 0; i < uLen; ++i)