tree-gen
C++ code generator for tree structures
tree-cbor.cpp.inc
Go to the documentation of this file.
1 #include <fstream>
2 #include <sstream>
3 #include <stdexcept>
4 #include <cstring>
5 
6 TREE_NAMESPACE_BEGIN
7 namespace cbor {
8 
13 Reader::Reader(const std::string &data) : Reader(std::string(data)) {}
14 
19 Reader::Reader(std::string &&data) :
20  data(std::make_shared<std::string>(std::forward<std::string>(data))),
21  slice_offset(0),
22  slice_length(this->data->size())
23 {
24  if (!slice_length) {
25  throw TREE_RUNTIME_ERROR("invalid CBOR: zero-size object");
26  }
27  check();
28 }
29 
33 Reader::Reader(const Reader &parent, size_t offs, size_t len) :
34  data(parent.data),
35  slice_offset(parent.slice_offset + offs),
36  slice_length(len)
37 {
38  if (slice_offset + slice_length > parent.slice_offset + parent.slice_length) {
39  throw TREE_RUNTIME_ERROR("invalid CBOR: trying to slice past extents of current slice");
40  }
41  if (this->slice_length == 0) {
42  throw TREE_RUNTIME_ERROR("invalid CBOR: trying to make an empty slice");
43  }
44 
45  // Seek past semantic tags.
46  uint8_t initial = read_at(0);
47  uint8_t type = initial >> 5u;
48  if (type == 6) {
49  size_t tag_len = 1;
50  read_intlike(initial & 0x1Fu, tag_len);
51  slice_offset += tag_len;
52  slice_length -= tag_len;
53  if (this->slice_length == 0) {
54  throw TREE_RUNTIME_ERROR("invalid CBOR: semantic tag has no value");
55  }
56  }
57 }
58 
62 Reader Reader::slice(size_t offset, size_t length) const {
63  return Reader(*this, offset, length);
64 }
65 
69 uint8_t Reader::read_at(size_t offset) const {
70  if (offset >= slice_length) {
71  throw TREE_RUNTIME_ERROR("invalid CBOR: trying to read past extents of current slice");
72  }
73  return data->at(this->slice_offset + offset);
74 }
75 
82 uint64_t Reader::read_intlike(uint8_t info, size_t &offset) const {
83 
84  // Info less than 24 is a shorthand for the integer itself.
85  if (info < 24u) return info;
86 
87  // Info greater than or equal to 24 is either illegal or a big-endian
88  // integer immediately following the initial byte. So read said integer.
89  uint64_t value = read_at(offset++);
90  if (info < 25u) return value;
91  value <<= 8u; value |= read_at(offset++);
92  if (info < 26u) return value;
93  value <<= 8u; value |= read_at(offset++);
94  value <<= 8u; value |= read_at(offset++);
95  if (info < 27u) return value;
96  value <<= 8u; value |= read_at(offset++);
97  value <<= 8u; value |= read_at(offset++);
98  value <<= 8u; value |= read_at(offset++);
99  value <<= 8u; value |= read_at(offset++);
100  if (info < 28u) return value;
101 
102  // Info greater than or equal to 28 is illegal. Note that 31 is used for
103  // indefinite lengths, so this must be checked prior to calling this method.
104  throw TREE_RUNTIME_ERROR("invalid CBOR: illegal additional info for integer or object length");
105 
106 }
107 
114 void Reader::read_stringlike(size_t &offset, std::ostream &s) const {
115  uint8_t info = read_at(offset++) & 0x1Fu;
116  if (info == 31) {
117 
118  // Handle indefinite length strings. These are just a break-terminated
119  // list of definite-length strings, so we can just call ourselves for
120  // the child objects.
121  while (read_at(offset) != 0xFF) {
122  read_stringlike(offset, s);
123  }
124  offset++;
125 
126  } else {
127 
128  // Handle definite-length strings.
129  uint64_t length = read_intlike(info, offset);
130  if (length + offset > this->slice_length) {
131  throw TREE_RUNTIME_ERROR("Invalid CBOR: string read past end of slice");
132  }
133  s.write(data->data() + this->slice_offset + offset, length);
134  offset += length;
135 
136  }
137 }
138 
143 void Reader::check_and_seek(size_t &offset) const {
144 
145  // Read the initial byte.
146  uint8_t initial = read_at(offset++);
147  uint8_t type = initial >> 5u;
148  uint8_t info = initial & 0x1Fu;
149 
150  // Handle major types 0 through 6.
151  switch (type) {
152  case 0: // unsigned integer
153  case 1: // negative integer
154  read_intlike(info, offset);
155  return;
156 
157  case 2: // byte string
158  case 3: // UTF8 string
159 
160  // Handle indefinite length strings.
161  if (info == 31) {
162 
163  // Indefinite strings consist of a break-terminated (0xFF) list
164  // of definite-length strings of the same type.
165  uint8_t sub_initial;
166  while ((sub_initial = read_at(offset++)) != 0xFF) {
167  uint8_t sub_type = sub_initial >> 5u;
168  uint8_t sub_info = sub_initial & 0x1Fu;
169  if (sub_type != type) {
170  throw TREE_RUNTIME_ERROR("invalid CBOR: illegal indefinite-length string component");
171  }
172 
173  // Seek past definite-length string component. The size in
174  // bytes is encoded as an integer.
175  offset += read_intlike(sub_info, offset);
176 
177  }
178 
179  return;
180  }
181 
182  // Seek past definite-length string. The size in bytes is
183  // encoded as an integer.
184  offset += read_intlike(info, offset);
185  return;
186 
187  case 4: // array
188  case 5: // map
189 
190  // Handle indefinite length arrays and maps.
191  if (info == 31) {
192 
193  // Read objects/object pairs until we encounter a break.
194  while (read_at(offset) != 0xFF) {
195  if (type == 5) check_and_seek(offset);
196  check_and_seek(offset);
197  }
198 
199  // Seek past the break.
200  offset++;
201 
202  return;
203  }
204 
205  // Handle definite-length arrays and maps. The amount of
206  // objects/object pairs is encoded as an integer.
207  for (uint64_t size = read_intlike(info, offset); size--;) {
208  if (type == 5) check_and_seek(offset);
209  check_and_seek(offset);
210  }
211  return;
212 
213  case 6: // semantic tag
214 
215  // We don't use semantic tags for anything, but ignoring them is
216  // legal and reading past them is easy enough.
217  read_intlike(info, offset);
218  check_and_seek(offset);
219  return;
220 
221  default:
222  break;
223  }
224 
225  // Handle major type 7. Here, the type is defined by the additional info.
226  // Additional info 24 is reserved for having the type specified by the next
227  // byte, but all such values are unassigned.
228  switch (info) {
229  case 20: // false
230  case 21: // true
231  case 22: // null
232  // Simple value with no additional data, we're already done.
233  return;
234 
235  case 23: // undefined
236  throw TREE_RUNTIME_ERROR("invalid CBOR: undefined value is not supported");
237 
238  case 25: // half-precision float
239  throw TREE_RUNTIME_ERROR("invalid CBOR: half-precision float is not supported");
240 
241  case 26: // single-precision float
242  throw TREE_RUNTIME_ERROR("invalid CBOR: single-precision float is not supported");
243 
244  case 27: // double-precision float
245  offset += 8;
246  return;
247 
248  case 31: // break
249  throw TREE_RUNTIME_ERROR("invalid CBOR: unexpected break");
250 
251  default:
252  break;
253  }
254 
255  throw TREE_RUNTIME_ERROR("invalid CBOR: unknown type code");
256 }
257 
262 void Reader::check() const {
263  size_t offset = 0u;
264  check_and_seek(offset);
265  if (offset != this->slice_length) {
266  throw TREE_RUNTIME_ERROR("invalid CBOR: garbage at end of outer object or multiple objects");
267  }
268 }
269 
283 const char *Reader::get_type_name() const {
284  uint8_t initial = read_at(0);
285  uint8_t type = initial >> 5u;
286  uint8_t info = initial & 0x1Fu;
287  switch (type) {
288  case 0: case 1: return "integer";
289  case 2: return "binary string";
290  case 3: return "UTF8 string";
291  case 4: return "array";
292  case 5: return "map";
293  case 7:
294  switch (info) {
295  case 20: case 21: return "boolean";
296  case 22: return "null";
297  case 27: return "float";
298  default: break;
299  }
300  default: break;
301  }
302  return "unknown type";
303 }
304 
308 bool Reader::is_null() const {
309  return read_at(0) == 0xF6;
310 }
311 
316 void Reader::as_null() const {
317  if (!is_null()) {
318  throw TREE_RUNTIME_ERROR(
319  "unexpected CBOR structure: expected null but found "
320  + std::string(get_type_name()));
321  }
322 }
323 
327 bool Reader::is_bool() const {
328  return (read_at(0) & 0xFEu) == 0xF4;
329 }
330 
335 bool Reader::as_bool() const {
336  switch (read_at(0)) {
337  case 0xF4: return false;
338  case 0xF5: return true;
339  }
340  throw TREE_RUNTIME_ERROR(
341  "unexpected CBOR structure: expected boolean but found "
342  + std::string(get_type_name()));
343 }
344 
348 bool Reader::is_int() const {
349  return (read_at(0) & 0xC0u) == 0;
350 }
351 
357 int64_t Reader::as_int() const {
358  uint8_t initial = read_at(0);
359  uint8_t type = initial >> 5u;
360  if (type >= 2) {
361  throw TREE_RUNTIME_ERROR(
362  "unexpected CBOR structure: expected integer but found "
363  + std::string(get_type_name()));
364  }
365  uint8_t info = initial & 0x1Fu;
366  size_t offset = 1;
367  uint64_t value = read_intlike(info, offset);
368  if (value >= 0x8000000000000000ull) {
369  throw TREE_RUNTIME_ERROR("CBOR integer out of int64 range");
370  }
371  if (type == 0) {
372  return (int64_t)value;
373  } else {
374  return -1 - (int64_t)value;
375  }
376 }
377 
382 bool Reader::is_float() const {
383  return read_at(0) == 0xFBu;
384 }
385 
391 double Reader::as_float() const {
392  if (!is_float()) {
393  throw TREE_RUNTIME_ERROR(
394  "unexpected CBOR structure: expected float but found "
395  + std::string(get_type_name()));
396  }
397  size_t offset = 1;
398  uint64_t value = read_intlike(27, offset);
399  double retval = 0.0;
400  memcpy(&retval, &value, sizeof(retval));
401  return retval;
402 }
403 
407 bool Reader::is_string() const {
408  return (read_at(0) & 0xE0u) == 0x60u;
409 }
410 
416 std::string Reader::as_string() const {
417  if (!is_string()) {
418  throw TREE_RUNTIME_ERROR(
419  "unexpected CBOR structure: expected UTF8 string but found "
420  + std::string(get_type_name()));
421  }
422  std::ostringstream ss;
423  size_t offset = 0;
424  read_stringlike(offset, ss);
425  return ss.str();
426 }
427 
431 bool Reader::is_binary() const {
432  return (read_at(0) & 0xE0u) == 0x40u;
433 }
434 
440 std::string Reader::as_binary() const {
441  if (!is_binary()) {
442  throw TREE_RUNTIME_ERROR(
443  "unexpected CBOR structure: expected binary string but found "
444  + std::string(get_type_name()));
445  }
446  std::ostringstream ss;
447  size_t offset = 0;
448  read_stringlike(offset, ss);
449  return ss.str();
450 }
451 
455 bool Reader::is_array() const {
456  return (read_at(0) & 0xE0u) == 0x80u;
457 }
458 
463 void Reader::read_array_item(size_t &offset, ArrayReader &ar) const {
464  size_t start = offset;
465  check_and_seek(offset);
466  ar.push_back(slice(start, offset - start));
467 }
468 
473 ArrayReader Reader::as_array() const {
474  if (!is_array()) {
475  throw TREE_RUNTIME_ERROR(
476  "unexpected CBOR structure: expected array but found "
477  + std::string(get_type_name()));
478  }
479 
480  uint8_t info = read_at(0) & 0x1Fu;
481  size_t offset = 1;
482  ArrayReader ar;
483 
484  if (info == 31) {
485 
486  // Handle indefinite length arrays.
487  while (read_at(offset) != 0xFF) {
488  read_array_item(offset, ar);
489  }
490 
491  } else {
492 
493  // Handle definite-length arrays and maps. The amount of
494  // objects/object pairs is encoded as an integer.
495  for (uint64_t size = read_intlike(info, offset); size--;) {
496  read_array_item(offset, ar);
497  }
498 
499  }
500 
501  return ar;
502 }
503 
507 bool Reader::is_map() const {
508  return (read_at(0) & 0xE0u) == 0xA0u;
509 }
510 
515 void Reader::read_map_item(size_t &offset, MapReader &map) const {
516  size_t key_start = offset;
517  check_and_seek(offset);
518  size_t data_start = offset;
519  check_and_seek(offset);
520  map.insert(std::make_pair(
521  slice(key_start, data_start - key_start).as_string(),
522  slice(data_start, offset - data_start)));
523 }
524 
529 MapReader Reader::as_map() const {
530  if (!is_map()) {
531  throw TREE_RUNTIME_ERROR(
532  "unexpected CBOR structure: expected map but found "
533  + std::string(get_type_name()));
534  }
535 
536  uint8_t info = read_at(0) & 0x1Fu;
537  size_t offset = 1;
538  MapReader map;
539 
540  if (info == 31) {
541 
542  // Handle indefinite length arrays.
543  while (read_at(offset) != 0xFFu) {
544  read_map_item(offset, map);
545  }
546 
547  } else {
548 
549  // Handle definite-length arrays and maps. The amount of
550  // objects/object pairs is encoded as an integer.
551  for (uint64_t size = read_intlike(info, offset); size--;) {
552  read_map_item(offset, map);
553  }
554 
555  }
556 
557  return map;
558 }
559 
563 std::string Reader::get_contents() const {
564  return data->substr(slice_offset, slice_length);
565 }
566 
570 StructureWriter::StructureWriter(Writer &writer) :
571  writer(&writer),
572  id(writer.id_counter)
573 {
574  writer.stack.push(id);
575  writer.id_counter++;
576 }
577 
582 std::ostream &StructureWriter::stream() {
583  if (!writer || writer->stack.empty() || writer->stack.top() != id) {
584  throw TREE_RUNTIME_ERROR("Attempt to write to CBOR object using inactive writer");
585  }
586  return writer->stream;
587 }
588 
592 void StructureWriter::write_null() {
593  uint8_t data = 0xF6;
594  stream().write(reinterpret_cast<char*>(&data), 1);
595 }
596 
600 void StructureWriter::write_bool(bool value) {
601  uint8_t data = value ? 0xF5 : 0xF4;
602  stream().write(reinterpret_cast<char*>(&data), 1);
603 }
604 
610 void StructureWriter::write_int(int64_t int_value, uint8_t major) {
611  uint64_t value;
612  if (int_value < 0) {
613  major = 1;
614  value = -1 - int_value;
615  } else {
616  value = int_value;
617  }
618  uint8_t data[9];
619  data[0] = major << 5u;
620  if (value < 24) {
621  data[0] |= value;
622  stream().write(reinterpret_cast<char*>(&data), 1);
623  } else if (value < 0x100ll) {
624  data[0] |= 24u;
625  data[1] = static_cast<uint8_t>(value);
626  stream().write(reinterpret_cast<char*>(&data), 2);
627  } else if (value < 0x10000ll) {
628  data[0] |= 25u;
629  data[1] = static_cast<uint8_t>(value >> 8u);
630  data[2] = static_cast<uint8_t>(value);
631  stream().write(reinterpret_cast<char*>(&data), 3);
632  } else if (value < 0x100000000ll) {
633  data[0] |= 26u;
634  data[1] = static_cast<uint8_t>(value >> 24u);
635  data[2] = static_cast<uint8_t>(value >> 16u);
636  data[3] = static_cast<uint8_t>(value >> 8u);
637  data[4] = static_cast<uint8_t>(value);
638  stream().write(reinterpret_cast<char*>(&data), 5);
639  } else {
640  data[0] |= 27u;
641  data[1] = static_cast<uint8_t>(value >> 56u);
642  data[2] = static_cast<uint8_t>(value >> 48u);
643  data[3] = static_cast<uint8_t>(value >> 40u);
644  data[4] = static_cast<uint8_t>(value >> 32u);
645  data[5] = static_cast<uint8_t>(value >> 24u);
646  data[6] = static_cast<uint8_t>(value >> 16u);
647  data[7] = static_cast<uint8_t>(value >> 8u);
648  data[8] = static_cast<uint8_t>(value);
649  stream().write(reinterpret_cast<char*>(&data), 9);
650  }
651 }
652 
656 void StructureWriter::write_float(double value) {
657  uint8_t data[9];
658  data[0] = 0xFB;
659  std::memcpy(data + 1, &value, 8);
660  std::swap(data[1], data[8]);
661  std::swap(data[2], data[7]);
662  std::swap(data[3], data[6]);
663  std::swap(data[4], data[5]);
664  stream().write(reinterpret_cast<char*>(&data), 9);
665 }
666 
670 void StructureWriter::write_string(const std::string &value) {
671  write_int(value.size(), 3);
672  stream().write(value.data(), value.size());
673 }
674 
678 void StructureWriter::write_binary(const std::string &value) {
679  write_int(value.size(), 2);
680  stream().write(value.data(), value.size());
681 }
682 
688 ArrayWriter StructureWriter::write_array() {
689  // Ensure that we're allowed to write.
690  stream();
691  return ArrayWriter(*writer);
692 }
693 
699 MapWriter StructureWriter::write_map() {
700  // Ensure that we're allowed to write.
701  stream();
702  return MapWriter(*writer);
703 }
704 
709 StructureWriter::~StructureWriter() {
710  if (writer && !writer->stack.empty() && writer->stack.top() == id) {
711  close();
712  }
713 }
714 
718 StructureWriter::StructureWriter(StructureWriter &&src) : writer(src.writer), id(src.id) {
719  src.writer = nullptr;
720  src.id = 0;
721 }
722 
726 StructureWriter &StructureWriter::operator=(StructureWriter &&src) {
727  if (writer && !writer->stack.empty() && writer->stack.top() == id) {
728  close();
729  }
730  writer = src.writer;
731  id = src.id;
732  src.writer = nullptr;
733  src.id = 0;
734  return *this;
735 }
736 
741 void StructureWriter::close() {
742  uint8_t data = 0xFF;
743  stream().write(reinterpret_cast<char*>(&data), 1);
744  writer->stack.pop();
745 }
746 
751 ArrayWriter::ArrayWriter(Writer &writer) : StructureWriter(writer) {
752  uint8_t data = 0x9F;
753  stream().write(reinterpret_cast<char*>(&data), 1);
754 }
755 
759 void ArrayWriter::append_null() {
760  write_null();
761 }
762 
766 void ArrayWriter::append_bool(bool value) {
767  write_bool(value);
768 }
769 
773 void ArrayWriter::append_int(int64_t value) {
774  write_int(value);
775 }
776 
780 void ArrayWriter::append_float(double value) {
781  write_float(value);
782 }
783 
787 void ArrayWriter::append_string(const std::string &value) {
788  write_string(value);
789 }
790 
794 void ArrayWriter::append_binary(const std::string &value) {
795  write_binary(value);
796 }
797 
803 ArrayWriter ArrayWriter::append_array() {
804  return write_array();
805 }
806 
812 MapWriter ArrayWriter::append_map() {
813  return write_map();
814 }
815 
820 MapWriter::MapWriter(Writer &writer) : StructureWriter(writer) {
821  uint8_t data = 0xBF;
822  stream().write(reinterpret_cast<char*>(&data), 1);
823 }
824 
828 void MapWriter::append_null(const std::string &key) {
829  write_string(key);
830  write_null();
831 }
832 
836 void MapWriter::append_bool(const std::string &key, bool value) {
837  write_string(key);
838  write_bool(value);
839 }
840 
844 void MapWriter::append_int(const std::string &key, int64_t value) {
845  write_string(key);
846  write_int(value);
847 }
848 
853 void MapWriter::append_float(const std::string &key, double value) {
854  write_string(key);
855  write_float(value);
856 }
857 
861 void MapWriter::append_string(const std::string &key, const std::string &value) {
862  write_string(key);
863  write_string(value);
864 }
865 
869 void MapWriter::append_binary(const std::string &key, const std::string &value) {
870  write_string(key);
871  write_binary(value);
872 }
873 
880 ArrayWriter MapWriter::append_array(const std::string &key) {
881  write_string(key);
882  return write_array();
883 }
884 
891 MapWriter MapWriter::append_map(const std::string &key) {
892  write_string(key);
893  return write_map();
894 }
895 
899 Writer::Writer(std::ostream &stream) : stream(stream), id_counter(1) {
900 }
901 
908 MapWriter Writer::start() {
909  if (!stack.empty()) {
910  throw TREE_RUNTIME_ERROR("Writing of this CBOR object has already started");
911  }
912  return MapWriter(*this);
913 }
914 
915 } // namespace cbor
916 TREE_NAMESPACE_END
STL namespace.