Guitar
unicode.h
Go to the documentation of this file.
1 
2 #ifndef UNICODE_H_
3 #define UNICODE_H_
4 
5 #include <cstring>
6 #include <cstdlib>
7 #include <cstdint>
8 #include <functional>
9 
10 namespace unicode_helper_ {
11 
13  int a;
14  uint32_t b;
15 };
16 
17 class utf8decoder {
18 private:
19  char const *begin;
20  char const *end;
21  size_t pos;
22 
24 public:
25  utf8decoder(char const *begin, char const *end);
26  uint32_t next();
27  size_t offset() const
28  {
29  return pos;
30  }
31 };
32 
33 }
34 
36 
37 class utf8encoder {
38 private:
39  struct internal_data {
41  char c;
42  char buf[8];
43  int pos, len;
44  } data;
45 
46  class internal_writer;
47 
48  void set(abstract_unicode_reader *reader);
49  bool next();
50  inline bool next_();
51 public:
52  utf8encoder(abstract_unicode_reader *reader = nullptr);
53  char get();
54  int pos() const;
55 };
56 
57 class utf16encoder {
58 private:
59  struct internal_data {
61  uint16_t c;
62  uint16_t buf[2];
63  int pos, len;
64  } data;
65 
66  class internal_writer;
67 
68  void set(abstract_unicode_reader *reader);
69  bool next();
70  bool next_();
71 public:
72  utf16encoder(abstract_unicode_reader *reader = nullptr);
73  uint16_t get();
74 };
75 
76 
78 public:
79  virtual ~abstract_unicode_reader() = default;
80  virtual uint32_t next() = 0;
81 
82  void to_utf8(std::function<bool(char, int)> const &fn)
83  {
84  utf8encoder e(this);
85  while (1) {
86  int pos = e.pos();
87  int c = e.get();
88  if (c == 0) break;
89  if (!fn((char)c, pos)) break;
90  }
91  }
92  void to_utf16(std::function<bool(uint16_t)> const &fn)
93  {
94  utf16encoder e(this);
95  while (1) {
96  int c = e.get();
97  if (c == 0) break;
98  if (!fn((uint16_t)c)) break;
99  }
100  }
101  void to_utf32(std::function<bool(uint32_t)> const &fn)
102  {
103  while (1) {
104  uint32_t c = next();
105  if (c == 0) break;
106  if (!fn(c)) break;
107  }
108  }
109 };
110 
112 private:
113  struct {
114  uint32_t const *ptr;
115  uint32_t const *end;
116  } data;
117 public:
118  utf32(uint32_t const *ptr, uint32_t const *end);
119  utf32(uint32_t const *ptr);
120  utf32(uint32_t const *ptr, size_t len);
121  uint32_t next() override;
122 };
123 
125 private:
126  struct {
127  uint16_t const *ptr;
128  uint16_t const *end;
129  } data;
130 public:
131  utf16(uint16_t const *ptr, uint16_t const *end);
132  utf16(uint16_t const *ptr);
133  utf16(uint16_t const *ptr, size_t len);
134  uint32_t next() override;
135 };
136 
138 private:
140 public:
141  utf8(char const *ptr, char const *end);
142  utf8(char const *ptr);
143  utf8(char const *ptr, size_t len);
144  uint32_t next() override;
145  size_t offset() const
146  {
147  return reader.offset();
148  }
149 };
150 
151 
152 #endif
utf8encoder::internal_data::len
int len
Definition: unicode.h:43
utf16
Definition: unicode.h:124
utf8encoder::internal_data::reader
abstract_unicode_reader * reader
Definition: unicode.h:40
utf16encoder::internal_writer::len
int len
Definition: unicode.cpp:282
utf8encoder::utf8encoder
utf8encoder(abstract_unicode_reader *reader=nullptr)
Definition: unicode.cpp:223
unicode_helper_::utf8decoder::s
utf8_reader_state_t s
Definition: unicode.h:23
utf8encoder::data
struct utf8encoder::internal_data data
unicode_helper_::clear_state
void clear_state(utf8_reader_state_t *s)
Definition: unicode.cpp:24
utf8encoder::internal_writer::~internal_writer
~internal_writer() override=default
utf16::utf16
utf16(uint16_t const *ptr, uint16_t const *end)
Definition: unicode.cpp:168
utf16encoder::internal_writer::internal_writer
internal_writer(uint16_t *p)
Definition: unicode.cpp:284
utf16encoder::internal_data::buf
uint16_t buf[2]
Definition: unicode.h:62
utf16encoder::utf16encoder
utf16encoder(abstract_unicode_reader *reader=nullptr)
Definition: unicode.cpp:295
utf16encoder::set
void set(abstract_unicode_reader *reader)
Definition: unicode.cpp:300
utf8
Definition: unicode.h:137
utf16encoder::internal_writer::dst
uint16_t * dst
Definition: unicode.cpp:281
utf8encoder::internal_writer::len
int len
Definition: unicode.cpp:209
abstract_unicode_reader::to_utf8
void to_utf8(std::function< bool(char, int)> const &fn)
Definition: unicode.h:82
utf8::offset
size_t offset() const
Definition: unicode.h:145
abstract_unicode_reader::~abstract_unicode_reader
virtual ~abstract_unicode_reader()=default
unicode_helper_::writer16::put
virtual void put(int c)=0
utf16encoder::internal_data::len
int len
Definition: unicode.h:63
utf8encoder::internal_data
Definition: unicode.h:39
abstract_unicode_reader
Definition: unicode.h:77
utf16encoder::get
uint16_t get()
Definition: unicode.cpp:336
unicode_helper_::decode_utf8
int decode_utf8(utf8_reader_state_t *state, uint8_t c)
Definition: unicode.cpp:30
utf8encoder::internal_writer::internal_writer
internal_writer(char *p)
Definition: unicode.cpp:211
abstract_unicode_reader::next
virtual uint32_t next()=0
unicode_helper_::encode_utf16
void encode_utf16(writer16 *writer, uint32_t code)
Definition: unicode.cpp:103
utf8encoder::pos
int pos() const
Definition: unicode.cpp:272
utf8encoder::next
bool next()
Definition: unicode.cpp:246
unicode_helper_::utf8decoder::utf8decoder
utf8decoder(char const *begin, char const *end)
Definition: unicode.cpp:117
utf32::ptr
const uint32_t * ptr
Definition: unicode.h:114
unicode_helper_::writer8::put
virtual void put(int c)=0
abstract_unicode_reader::to_utf32
void to_utf32(std::function< bool(uint32_t)> const &fn)
Definition: unicode.h:101
utf8encoder::internal_writer
Definition: unicode.cpp:206
utf8::next
uint32_t next() override
Definition: unicode.cpp:361
utf16::ptr
const uint16_t * ptr
Definition: unicode.h:127
utf32::end
const uint32_t * end
Definition: unicode.h:115
utf8::utf8
utf8(char const *ptr, char const *end)
Definition: unicode.cpp:346
utf32
Definition: unicode.h:111
utf32::utf32
utf32(uint32_t const *ptr, uint32_t const *end)
Definition: unicode.cpp:140
utf16encoder::internal_writer
Definition: unicode.cpp:279
utf8encoder::set
void set(abstract_unicode_reader *reader)
Definition: unicode.cpp:228
unicode_helper_::utf8decoder::offset
size_t offset() const
Definition: unicode.h:27
unicode_helper_::utf8decoder::end
const char * end
Definition: unicode.h:20
unicode_helper_::writer8::~writer8
virtual ~writer8()=default
unicode_helper_::reader::get
virtual int get()=0
unicode_helper_
Definition: unicode.cpp:4
abstract_unicode_reader::to_utf16
void to_utf16(std::function< bool(uint16_t)> const &fn)
Definition: unicode.h:92
unicode_helper_::utf8decoder
Definition: unicode.h:17
utf16::data
struct utf16::@13 data
utf8encoder
Definition: unicode.h:37
utf16encoder::next
bool next()
Definition: unicode.cpp:318
unicode_helper_::utf8_reader_state_t::a
int a
Definition: unicode.h:13
utf16encoder
Definition: unicode.h:57
utf8encoder::internal_data::buf
char buf[8]
Definition: unicode.h:42
unicode_helper_::encode_utf8
void encode_utf8(writer8 *writer, uint32_t code)
Definition: unicode.cpp:68
unicode_helper_::utf8_reader_state_t::b
uint32_t b
Definition: unicode.h:14
utf16encoder::internal_data
Definition: unicode.h:59
utf8::reader
unicode_helper_::utf8decoder reader
Definition: unicode.h:139
utf8encoder::internal_data::pos
int pos
Definition: unicode.h:43
utf8encoder::internal_writer::dst
char * dst
Definition: unicode.cpp:208
utf8encoder::get
char get()
Definition: unicode.cpp:264
unicode_helper_::reader
Definition: unicode.cpp:6
utf8encoder::next_
bool next_()
Definition: unicode.cpp:235
utf32::next
uint32_t next() override
Definition: unicode.cpp:158
utf16encoder::data
struct utf16encoder::internal_data data
unicode_helper_::writer16
Definition: unicode.cpp:18
unicode.h
unicode_helper_::utf8decoder::begin
const char * begin
Definition: unicode.h:19
utf16::next
uint32_t next() override
Definition: unicode.cpp:186
utf8encoder::internal_writer::put
void put(int c) override
Definition: unicode.cpp:217
utf16encoder::internal_writer::~internal_writer
~internal_writer() override=default
unicode_helper_::writer16::~writer16
virtual ~writer16()=default
unicode_helper_::utf8_reader_state_t
Definition: unicode.h:12
utf16encoder::internal_data::pos
int pos
Definition: unicode.h:63
utf16::end
const uint16_t * end
Definition: unicode.h:128
utf16encoder::internal_data::reader
abstract_unicode_reader * reader
Definition: unicode.h:60
utf32::data
struct utf32::@12 data
utf16encoder::internal_data::c
uint16_t c
Definition: unicode.h:61
unicode_helper_::utf8decoder::next
uint32_t next()
Definition: unicode.cpp:125
utf16encoder::next_
bool next_()
Definition: unicode.cpp:307
unicode_helper_::utf8decoder::pos
size_t pos
Definition: unicode.h:21
utf16encoder::internal_writer::put
void put(int c) override
Definition: unicode.cpp:289
utf8encoder::internal_data::c
char c
Definition: unicode.h:41
unicode_helper_::reader::~reader
virtual ~reader()=default
unicode_helper_::writer8
Definition: unicode.cpp:12