Guitar
unicode.h
Go to the documentation of this file.
1 
2 #ifndef UNICODE_H_
3 #define UNICODE_H_
4 
5 #include <cstring>
6 #include <cstdlib>
7 #include <cstdint>
8 #include <functional>
9 
10 namespace unicode_helper_ {
11 
13  int a;
14  uint32_t b;
15 };
16 
17 class utf8decoder {
18 private:
19  char const *begin;
20  char const *end;
21  size_t pos;
22 
24 public:
25  utf8decoder(char const *begin, char const *end);
26  uint32_t next();
27  size_t offset() const
28  {
29  return pos;
30  }
31 };
32 
33 }
34 
36 
37 class utf8encoder {
38 private:
39  struct internal_data {
41  char c;
42  char buf[8];
43  int pos, len;
44  } data;
45 
46  class internal_writer;
47 
48  void set(abstract_unicode_reader *reader);
49  bool next();
50  inline bool next_();
51 public:
52  utf8encoder(abstract_unicode_reader *reader = nullptr);
53  char get();
54  int pos() const;
55 };
56 
57 class utf16encoder {
58 private:
59  struct internal_data {
61  uint16_t c;
62  uint16_t buf[2];
63  int pos, len;
64  } data;
65 
66  class internal_writer;
67 
68  void set(abstract_unicode_reader *reader);
69  bool next();
70  bool next_();
71 public:
72  utf16encoder(abstract_unicode_reader *reader = nullptr);
73  uint16_t get();
74 };
75 
76 
78 public:
79  virtual ~abstract_unicode_reader() = default;
80  virtual uint32_t next() = 0;
81 
82  void to_utf8(std::function<bool(char, int)> const &fn)
83  {
84  utf8encoder e(this);
85  while (1) {
86  int pos = e.pos();
87  int c = e.get();
88  if (c == 0) break;
89  if (!fn((char)c, pos)) break;
90  }
91  }
92  void to_utf16(std::function<bool(uint16_t)> const &fn)
93  {
94  utf16encoder e(this);
95  while (1) {
96  int c = e.get();
97  if (c == 0) break;
98  if (!fn((uint16_t)c)) break;
99  }
100  }
101  void to_utf32(std::function<bool(uint32_t)> const &fn)
102  {
103  while (1) {
104  uint32_t c = next();
105  if (c == 0) break;
106  if (!fn(c)) break;
107  }
108  }
109 };
110 
112 private:
113  struct {
114  uint32_t const *ptr;
115  uint32_t const *end;
116  } data;
117 public:
118  utf32(uint32_t const *ptr, uint32_t const *end);
119  utf32(uint32_t const *ptr);
120  utf32(uint32_t const *ptr, size_t len);
121  uint32_t next() override;
122 };
123 
125 private:
126  struct {
127  uint16_t const *ptr;
128  uint16_t const *end;
129  } data;
130 public:
131  utf16(uint16_t const *ptr, uint16_t const *end);
132  utf16(uint16_t const *ptr);
133  utf16(uint16_t const *ptr, size_t len);
134  uint32_t next() override;
135 };
136 
138 private:
140 public:
141  utf8(char const *ptr, char const *end);
142  utf8(char const *ptr);
143  utf8(char const *ptr, size_t len);
144  uint32_t next() override;
145  size_t offset() const
146  {
147  return reader.offset();
148  }
149 };
150 
151 
152 #endif
Definition: unicode.h:77
virtual uint32_t next()=0
virtual ~abstract_unicode_reader()=default
void to_utf8(std::function< bool(char, int)> const &fn)
Definition: unicode.h:82
void to_utf16(std::function< bool(uint16_t)> const &fn)
Definition: unicode.h:92
void to_utf32(std::function< bool(uint32_t)> const &fn)
Definition: unicode.h:101
Definition: unicode.h:17
char const * end
Definition: unicode.h:20
utf8decoder(char const *begin, char const *end)
Definition: unicode.cpp:117
char const * begin
Definition: unicode.h:19
utf8_reader_state_t s
Definition: unicode.h:23
uint32_t next()
Definition: unicode.cpp:125
size_t offset() const
Definition: unicode.h:27
size_t pos
Definition: unicode.h:21
Definition: unicode.h:124
uint16_t const * end
Definition: unicode.h:128
struct utf16::@14 data
utf16(uint16_t const *ptr, uint16_t const *end)
Definition: unicode.cpp:168
uint16_t const * ptr
Definition: unicode.h:127
uint32_t next() override
Definition: unicode.cpp:186
Definition: unicode.h:57
uint16_t get()
Definition: unicode.cpp:336
utf16encoder(abstract_unicode_reader *reader=nullptr)
Definition: unicode.cpp:295
bool next_()
Definition: unicode.cpp:307
void set(abstract_unicode_reader *reader)
Definition: unicode.cpp:300
struct utf16encoder::internal_data data
bool next()
Definition: unicode.cpp:318
Definition: unicode.h:111
uint32_t next() override
Definition: unicode.cpp:158
uint32_t const * ptr
Definition: unicode.h:114
uint32_t const * end
Definition: unicode.h:115
utf32(uint32_t const *ptr, uint32_t const *end)
Definition: unicode.cpp:140
struct utf32::@13 data
Definition: unicode.h:137
unicode_helper_::utf8decoder reader
Definition: unicode.h:139
size_t offset() const
Definition: unicode.h:145
uint32_t next() override
Definition: unicode.cpp:361
utf8(char const *ptr, char const *end)
Definition: unicode.cpp:346
Definition: unicode.h:37
int pos() const
Definition: unicode.cpp:272
bool next()
Definition: unicode.cpp:246
bool next_()
Definition: unicode.cpp:235
char get()
Definition: unicode.cpp:264
void set(abstract_unicode_reader *reader)
Definition: unicode.cpp:228
utf8encoder(abstract_unicode_reader *reader=nullptr)
Definition: unicode.cpp:223
struct utf8encoder::internal_data data
Definition: unicode.cpp:4
Definition: unicode.h:12
uint32_t b
Definition: unicode.h:14
int a
Definition: unicode.h:13
Definition: unicode.h:59
int pos
Definition: unicode.h:63
uint16_t c
Definition: unicode.h:61
abstract_unicode_reader * reader
Definition: unicode.h:60
uint16_t buf[2]
Definition: unicode.h:62
int len
Definition: unicode.h:63
Definition: unicode.h:39
abstract_unicode_reader * reader
Definition: unicode.h:40
char buf[8]
Definition: unicode.h:42
int len
Definition: unicode.h:43
int pos
Definition: unicode.h:43
char c
Definition: unicode.h:41