Guitar
htmlencode.h
Go to the documentation of this file.
1 
2 #ifndef __HTMLENCODE_H
3 #define __HTMLENCODE_H
4 
5 #include <string>
6 #include <string_view>
7 #include <vector>
8 #include <cstring>
9 
10 class HtmlEncode {
11 public:
12 
13  static void vecprint(std::vector<char> *out, char c)
14  {
15  out->push_back(c);
16  }
17 
18  static void vecprint(std::vector<char> *out, char const *s)
19  {
20  out->insert(out->end(), s, s + strlen(s));
21  }
22 
23  static std::string_view to_string(std::vector<char> const &vec)
24  {
25  if (!vec.empty()) {
26  return {vec.data(), vec.size()};
27  }
28  return {};
29  }
30 
31  static void html_encode_(char const *ptr, char const *end, bool utf8through, std::vector<char> *vec)
32  {
33  while (ptr < end) {
34  int c = *ptr & 0xff;
35  ptr++;
36  switch (c) {
37  case '&':
38  vecprint(vec, "&amp;");
39  break;
40  case '<':
41  vecprint(vec, "&lt;");
42  break;
43  case '>':
44  vecprint(vec, "&gt;");
45  break;
46  case '\"':
47  vecprint(vec, "&quot;");
48  break;
49  case '\'':
50  vecprint(vec, "&apos;");
51  break;
52  case '\t':
53  case '\n':
54  vecprint(vec, c);
55  break;
56  default:
57  if (c < 0x80 ? (c < 0x20 || c == '\'') : !utf8through) {
58  char tmp[10];
59  sprintf(tmp, "&#%u;", c);
60  vecprint(vec, tmp);
61  } else {
62  vecprint(vec, c);
63  }
64  }
65  }
66  }
67 
68  static void html_decode_(char const *ptr, char const *end, std::vector<char> *vec)
69  {
70  while (ptr < end) {
71  int c = *ptr & 0xff;
72  ptr++;
73  if (c == '&') {
74  char const *next = strchr(ptr, ';');
75  if (!next) {
76  break;
77  }
78  std::string t(ptr, next);
79  if (t[0] == '#') {
80  c = atoi(t.c_str() + 1);
81  vecprint(vec, c);
82  } else if (t == "amp") {
83  vecprint(vec, '&');
84  } else if (t == "lt") {
85  vecprint(vec, '<');
86  } else if (t == "gt") {
87  vecprint(vec, '>');
88  } else if (t == "quot") {
89  vecprint(vec, '\"');
90  } else if (t == "apos") {
91  vecprint(vec, '\'');
92  }
93  ptr = next + 1;
94  } else {
95  vecprint(vec, c);
96  }
97  }
98  }
99 
100  static std::string html_encode(char const *ptr, char const *end, bool utf8through)
101  {
102  std::vector<char> vec;
103  vec.reserve((end - ptr) * 2);
104  html_encode_(ptr, end, utf8through, &vec);
105  return (std::string)to_string(vec);
106  }
107 
108  static std::string html_decode(char const *ptr, char const *end)
109  {
110  std::vector<char> vec;
111  vec.reserve((end - ptr) * 2);
112  html_decode_(ptr, end, &vec);
113  return (std::string)to_string(vec);
114  }
115 
116  static std::string html_encode(char const *ptr, size_t len, bool utf8through)
117  {
118  return html_encode(ptr, ptr + len, utf8through);
119  }
120 
121  static std::string html_decode(char const *ptr, size_t len)
122  {
123  return html_decode(ptr, ptr + len);
124  }
125 
126  static std::string html_encode(char const *ptr, bool utf8through)
127  {
128  return html_encode(ptr, strlen(ptr), utf8through);
129  }
130 
131  static std::string html_decode(char const *ptr)
132  {
133  return html_decode(ptr, strlen(ptr));
134  }
135 
136  static std::string html_encode(std::string_view const &str, bool utf8through)
137  {
138  char const *begin = str.data();
139  char const *end = begin + str.size();
140  char const *ptr = begin;
141  while (ptr < end) {
142  int c = (unsigned char)*ptr;
143  if (isspace(c) || strchr("&<>\"\'", c)) {
144  break;
145  }
146  ptr++;
147  }
148  if (ptr == end) {
149  return (std::string)str;
150  }
151  std::vector<char> vec;
152  vec.reserve(str.size() * 2);
153  vec.insert(vec.end(), begin, ptr);
154  html_encode_(ptr, end, utf8through, &vec);
155  begin = &vec[0];
156  end = begin + vec.size();
157  return std::string(begin, end);
158  }
159 
160  static std::string html_decode(std::string_view const &str)
161  {
162  char const *begin = str.data();
163  char const *end = begin + str.size();
164  char const *ptr = begin;
165  while (ptr < end) {
166  int c = (unsigned char)*ptr;
167  if (c == '&') {
168  break;
169  }
170  ptr++;
171  }
172  if (ptr == end) {
173  return (std::string)str;
174  }
175  std::vector<char> vec;
176  vec.reserve(str.size() * 2);
177  vec.insert(vec.end(), begin, ptr);
178  html_decode_(ptr, end, &vec);
179  begin = &vec[0];
180  end = begin + vec.size();
181  return std::string(begin, end);
182  }
183 
184 };
185 
186 static inline std::string html_encode(char const *ptr, char const *end, bool utf8through = true)
187 {
188  return HtmlEncode::html_encode(ptr, end, utf8through);
189 }
190 
191 static inline std::string html_decode(char const *ptr, char const *end)
192 {
193  return HtmlEncode::html_decode(ptr, end);
194 }
195 
196 static inline std::string html_encode(char const *ptr, size_t len, bool utf8through = true)
197 {
198  return HtmlEncode::html_encode(ptr, ptr + len, utf8through);
199 }
200 
201 static inline std::string html_decode(char const *ptr, size_t len)
202 {
203  return HtmlEncode::html_decode(ptr, ptr + len);
204 }
205 
206 static inline std::string html_encode(char const *ptr, bool utf8through = true)
207 {
208  return HtmlEncode::html_encode(ptr, strlen(ptr), utf8through);
209 }
210 
211 static inline std::string html_decode(char const *ptr)
212 {
213  return HtmlEncode::html_decode(ptr, strlen(ptr));
214 }
215 
216 static inline std::string html_encode(std::string_view const &str, bool utf8through = true)
217 {
218  return HtmlEncode::html_encode(str, utf8through);
219 }
220 
221 static inline std::string html_decode(std::string_view const &str)
222 {
223  return HtmlEncode::html_decode(str);
224 }
225 
226 #endif
Definition: htmlencode.h:10
static void vecprint(std::vector< char > *out, char const *s)
Definition: htmlencode.h:18
static std::string html_encode(std::string_view const &str, bool utf8through)
Definition: htmlencode.h:136
static std::string html_decode(char const *ptr, char const *end)
Definition: htmlencode.h:108
static std::string_view to_string(std::vector< char > const &vec)
Definition: htmlencode.h:23
static std::string html_encode(char const *ptr, size_t len, bool utf8through)
Definition: htmlencode.h:116
static std::string html_decode(std::string_view const &str)
Definition: htmlencode.h:160
static void vecprint(std::vector< char > *out, char c)
Definition: htmlencode.h:13
static std::string html_decode(char const *ptr)
Definition: htmlencode.h:131
static std::string html_encode(char const *ptr, char const *end, bool utf8through)
Definition: htmlencode.h:100
static void html_decode_(char const *ptr, char const *end, std::vector< char > *vec)
Definition: htmlencode.h:68
static std::string html_encode(char const *ptr, bool utf8through)
Definition: htmlencode.h:126
static std::string html_decode(char const *ptr, size_t len)
Definition: htmlencode.h:121
static void html_encode_(char const *ptr, char const *end, bool utf8through, std::vector< char > *vec)
Definition: htmlencode.h:31
static std::string html_encode(char const *ptr, char const *end, bool utf8through=true)
Definition: htmlencode.h:186
static std::string html_decode(char const *ptr, char const *end)
Definition: htmlencode.h:191