libpqxx
The C++ client library for PostgreSQL
array.hxx
1/* Handling of SQL arrays.
2 *
3 * DO NOT INCLUDE THIS FILE DIRECTLY; include pqxx/field instead.
4 *
5 * Copyright (c) 2000-2024, Jeroen T. Vermeulen.
6 *
7 * See COPYING for copyright license. If you did not receive a file called
8 * COPYING with this source code, please notify the distributor of this
9 * mistake, or contact the author.
10 */
11#ifndef PQXX_H_ARRAY
12#define PQXX_H_ARRAY
13
14#if !defined(PQXX_HEADER_PRE)
15# error "Include libpqxx headers as <pqxx/header>, not <pqxx/header.hxx>."
16#endif
17
18#include <algorithm>
19#include <cassert>
20#include <stdexcept>
21#include <string>
22#include <type_traits>
23#include <utility>
24#include <vector>
25
26#include "pqxx/connection.hxx"
27#include "pqxx/internal/array-composite.hxx"
28#include "pqxx/internal/encoding_group.hxx"
29#include "pqxx/internal/encodings.hxx"
30
31
32namespace pqxx
33{
34// TODO: Specialise for string_view/zview, allocate all strings in one buffer.
35
37
52template<
53 typename ELEMENT, std::size_t DIMENSIONS = 1u,
54 char SEPARATOR = array_separator<ELEMENT>>
55class array final
56{
57public:
59
68 array(std::string_view data, connection const &conn) :
69 array{data, pqxx::internal::enc_group(conn.encoding_id())}
70 {}
71
73
75 constexpr std::size_t dimensions() noexcept { return DIMENSIONS; }
76
78
82 std::array<std::size_t, DIMENSIONS> const &sizes() noexcept
83 {
84 return m_extents;
85 }
86
87 template<typename... INDEX> ELEMENT const &at(INDEX... index) const
88 {
89 static_assert(sizeof...(index) == DIMENSIONS);
90 check_bounds(index...);
91 return m_elts.at(locate(index...));
92 }
93
95
103 template<typename... INDEX> ELEMENT const &operator[](INDEX... index) const
104 {
105 static_assert(sizeof...(index) == DIMENSIONS);
106 return m_elts[locate(index...)];
107 }
108
110
115 constexpr auto cbegin() const noexcept { return m_elts.cbegin(); }
117 constexpr auto cend() const noexcept { return m_elts.cend(); }
119 constexpr auto crbegin() const noexcept { return m_elts.crbegin(); }
121 constexpr auto crend() const noexcept { return m_elts.crend(); }
122
124
127 constexpr std::size_t size() const noexcept { return m_elts.size(); }
128
130
145 constexpr auto ssize() const noexcept
146 {
147 return static_cast<std::ptrdiff_t>(size());
148 }
149
151
153 constexpr auto front() const noexcept { return m_elts.front(); }
154
156
158 constexpr auto back() const noexcept { return m_elts.back(); }
159
160private:
162
170 void check_dims(std::string_view data)
171 {
172 auto sz{std::size(data)};
173 if (sz < DIMENSIONS * 2)
174 throw conversion_error{pqxx::internal::concat(
175 "Trying to parse a ", DIMENSIONS, "-dimensional array out of '", data,
176 "'.")};
177
178 // Making some assumptions here:
179 // * The array holds no extraneous whitespace.
180 // * None of the sub-arrays can be null.
181 // * Only ASCII characters start off with a byte in the 0-127 range.
182 //
183 // Given those, the input must start with a sequence of DIMENSIONS bytes
184 // with the ASCII value for '{'; and likewise it must end with a sequence
185 // of DIMENSIONS bytes with the ASCII value for '}'.
186
187 if (data[0] != '{')
188 throw conversion_error{"Malformed array: does not start with '{'."};
189 for (std::size_t i{0}; i < DIMENSIONS; ++i)
190 if (data[i] != '{')
191 throw conversion_error{pqxx::internal::concat(
192 "Expecting ", DIMENSIONS, "-dimensional array, but found ", i, ".")};
193 if (data[DIMENSIONS] == '{')
194 throw conversion_error{pqxx::internal::concat(
195 "Tried to parse ", DIMENSIONS,
196 "-dimensional array from array data that has more dimensions.")};
197 for (std::size_t i{0}; i < DIMENSIONS; ++i)
198 if (data[sz - 1 - i] != '}')
199 throw conversion_error{
200 "Malformed array: does not end in the right number of '}'."};
201 }
202
203 explicit array(std::string_view data, pqxx::internal::encoding_group enc)
204 {
205 using group = pqxx::internal::encoding_group;
206 switch (enc)
207 {
208 case group::MONOBYTE: parse<group::MONOBYTE>(data); break;
209 case group::BIG5: parse<group::BIG5>(data); break;
210 case group::EUC_CN: parse<group::EUC_CN>(data); break;
211 case group::EUC_JP: parse<group::EUC_JP>(data); break;
212 case group::EUC_KR: parse<group::EUC_KR>(data); break;
213 case group::EUC_TW: parse<group::EUC_TW>(data); break;
214 case group::GB18030: parse<group::GB18030>(data); break;
215 case group::GBK: parse<group::GBK>(data); break;
216 case group::JOHAB: parse<group::JOHAB>(data); break;
217 case group::MULE_INTERNAL: parse<group::MULE_INTERNAL>(data); break;
218 case group::SJIS: parse<group::SJIS>(data); break;
219 case group::UHC: parse<group::UHC>(data); break;
220 case group::UTF8: parse<group::UTF8>(data); break;
221 default: PQXX_UNREACHABLE; break;
222 }
223 }
224
226
229 std::size_t parse_field_end(std::string_view data, std::size_t here) const
230 {
231 auto const sz{std::size(data)};
232 if (here < sz)
233 switch (data[here])
234 {
235 case SEPARATOR:
236 ++here;
237 if (here >= sz)
238 throw conversion_error{"Array looks truncated."};
239 switch (data[here])
240 {
241 case SEPARATOR:
242 throw conversion_error{"Array contains double separator."};
243 case '}': throw conversion_error{"Array contains trailing separator."};
244 default: break;
245 }
246 break;
247 case '}': break;
248 default:
249 throw conversion_error{pqxx::internal::concat(
250 "Unexpected character in array: ",
251 static_cast<unsigned>(static_cast<unsigned char>(data[here])),
252 " where separator or closing brace expected.")};
253 }
254 return here;
255 }
256
258
263 constexpr std::size_t estimate_elements(std::string_view data) const noexcept
264 {
265 // Dirty trick: just count the number of bytes that look as if they may be
266 // separators. At the very worst we may overestimate by a factor of two or
267 // so, in exceedingly rare cases, on some encodings.
268 auto const separators{
269 std::count(std::begin(data), std::end(data), SEPARATOR)};
270 // The number of dimensions makes no difference here. It's still one
271 // separator between consecutive elements, just possibly with some extra
272 // braces as well.
273 return static_cast<std::size_t>(separators + 1);
274 }
275
276 template<pqxx::internal::encoding_group ENC>
277 void parse(std::string_view data)
278 {
279 static_assert(DIMENSIONS > 0u, "Can't create a zero-dimensional array.");
280 auto const sz{std::size(data)};
281 check_dims(data);
282
283 m_elts.reserve(estimate_elements(data));
284
285 // We discover the array's extents along each of the dimensions, starting
286 // with the final dimension and working our way towards the first. At any
287 // given point during parsing, we know the extents starting at this
288 // dimension.
289 std::size_t know_extents_from{DIMENSIONS};
290
291 // Currently parsing this dimension. We start off at -1, relying on C++'s
292 // well-defined rollover for unsigned numbers.
293 // The actual outermost dimension of the array is 0, and the innermost is
294 // at the end. But, the array as a whole is enclosed in braces just like
295 // each row. So we act like there's an anomalous "outer" dimension holding
296 // the entire array.
297 constexpr std::size_t outer{std::size_t{0u} - std::size_t{1u}};
298
299 // We start parsing at the fictional outer dimension. The input begins
300 // with opening braces, one for each dimension, so we'll start off by
301 // bumping all the way to the innermost dimension.
302 std::size_t dim{outer};
303
304 // Extent counters, one per "real" dimension.
305 // Note initialiser syntax; this zero-initialises all elements.
306 std::array<std::size_t, DIMENSIONS> extents{};
307
308 // Current parsing position.
309 std::size_t here{0};
310 PQXX_ASSUME(here <= sz);
311 while (here < sz)
312 {
313 if (data[here] == '{')
314 {
315 if (dim == outer)
316 {
317 // This must be the initial opening brace.
318 if (know_extents_from != DIMENSIONS)
319 throw conversion_error{
320 "Array text representation closed and reopened its outside "
321 "brace pair."};
322 assert(here == 0);
323 PQXX_ASSUME(here == 0);
324 }
325 else
326 {
327 if (dim >= (DIMENSIONS - 1))
328 throw conversion_error{
329 "Array seems to have inconsistent number of dimensions."};
330 ++extents[dim];
331 }
332 // (Rolls over to zero if we're coming from the outer dimension.)
333 ++dim;
334 extents[dim] = 0u;
335 ++here;
336 }
337 else if (data[here] == '}')
338 {
339 if (dim == outer)
340 throw conversion_error{"Array has spurious '}'."};
341 if (dim < know_extents_from)
342 {
343 // We just finished parsing our first row in this dimension.
344 // Now we know the array dimension's extent.
345 m_extents[dim] = extents[dim];
346 know_extents_from = dim;
347 }
348 else
349 {
350 if (extents[dim] != m_extents[dim])
351 throw conversion_error{"Rows in array have inconsistent sizes."};
352 }
353 // Bump back down to the next-lower dimension. Which may be the outer
354 // dimension, through underflow.
355 --dim;
356 ++here;
357 here = parse_field_end(data, here);
358 }
359 else
360 {
361 // Found an array element. The actual elements always live in the
362 // "inner" dimension.
363 if (dim != DIMENSIONS - 1)
364 throw conversion_error{
365 "Malformed array: found element where sub-array was expected."};
366 assert(dim != outer);
367 ++extents[dim];
368 std::size_t end;
369 switch (data[here])
370 {
371 case '\0': throw conversion_error{"Unexpected zero byte in array."};
372 case ',': throw conversion_error{"Array contains empty field."};
373 case '"': {
374 // Double-quoted string. We parse it into a buffer before parsing
375 // the resulting string as an element. This seems wasteful: the
376 // string might not contain any special characters. So it's
377 // tempting to check, and try to use a string_view and avoid a
378 // useless copy step. But. Even besides the branch prediction
379 // risk, the very fact that the back-end chose to quote the string
380 // indicates that there is some kind of special character in there.
381 // So in practice, this optimisation would only apply if the only
382 // special characters in the string were commas.
383 end = pqxx::internal::scan_double_quoted_string<ENC>(
384 std::data(data), std::size(data), here);
385 // TODO: scan_double_quoted_string() with reusable buffer.
386 std::string const buf{
387 pqxx::internal::parse_double_quoted_string<ENC>(
388 std::data(data), end, here)};
389 m_elts.emplace_back(from_string<ELEMENT>(buf));
390 }
391 break;
392 default: {
393 // Unquoted string. An unquoted string is always literal, no
394 // escaping or encoding, so we don't need to parse it into a
395 // buffer. We can just read it as a string_view.
396 end = pqxx::internal::scan_unquoted_string<ENC, SEPARATOR, '}'>(
397 std::data(data), std::size(data), here);
398 std::string_view const field{
399 std::string_view{std::data(data) + here, end - here}};
400 if (field == "NULL")
401 {
402 if constexpr (nullness<ELEMENT>::has_null)
403 m_elts.emplace_back(nullness<ELEMENT>::null());
404 else
405 throw unexpected_null{pqxx::internal::concat(
406 "Array contains a null ", type_name<ELEMENT>,
407 ". Consider making it an array of std::optional<",
408 type_name<ELEMENT>, "> instead.")};
409 }
410 else
411 m_elts.emplace_back(from_string<ELEMENT>(field));
412 }
413 }
414 here = end;
415 PQXX_ASSUME(here <= sz);
416 here = parse_field_end(data, here);
417 }
418 }
419
420 if (dim != outer)
421 throw conversion_error{"Malformed array; may be truncated."};
422 assert(know_extents_from == 0);
423 PQXX_ASSUME(know_extents_from == 0);
424
425 init_factors();
426 }
427
429 void init_factors() noexcept
430 {
431 std::size_t factor{1};
432 for (std::size_t dim{DIMENSIONS - 1}; dim > 0; --dim)
433 {
434 factor *= m_extents[dim];
435 m_factors[dim - 1] = factor;
436 }
437 }
438
440 template<typename... INDEX> std::size_t locate(INDEX... index) const noexcept
441 {
442 static_assert(
443 sizeof...(index) == DIMENSIONS,
444 "Indexing array with wrong number of dimensions.");
445 return add_index(index...);
446 }
447
448 template<typename OUTER, typename... INDEX>
449 constexpr std::size_t add_index(OUTER outer, INDEX... indexes) const noexcept
450 {
451 std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
452 if constexpr (sizeof...(indexes) == 0)
453 {
454 return first;
455 }
456 else
457 {
458 static_assert(sizeof...(indexes) < DIMENSIONS);
459 // (Offset by 1 here because the outer dimension is not in there.)
460 constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
461 static_assert(dimension < DIMENSIONS);
462 return first * m_factors[dimension] + add_index(indexes...);
463 }
464 }
465
467
469 template<typename OUTER, typename... INDEX>
470 constexpr void check_bounds(OUTER outer, INDEX... indexes) const
471 {
472 std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
473 static_assert(sizeof...(indexes) < DIMENSIONS);
474 // (Offset by 1 here because the outer dimension is not in there.)
475 constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
476 static_assert(dimension < DIMENSIONS);
477 if (first >= m_extents[dimension])
478 throw range_error{pqxx::internal::concat(
479 "Array index for dimension ", dimension, " is out of bounds: ", first,
480 " >= ", m_extents[dimension])};
481
482 // Now check the rest of the indexes, if any.
483 if constexpr (sizeof...(indexes) > 0)
484 check_bounds(indexes...);
485 }
486
488 std::vector<ELEMENT> m_elts;
489
491 std::array<std::size_t, DIMENSIONS> m_extents;
492
494
501 std::array<std::size_t, DIMENSIONS - 1> m_factors;
502};
503
504
506
528class PQXX_LIBEXPORT array_parser
529{
530public:
532 enum class juncture
533 {
535 row_start,
537 row_end,
539 null_value,
541 string_value,
543 done,
544 };
545
547
551 explicit array_parser(
552 std::string_view input,
553 internal::encoding_group = internal::encoding_group::MONOBYTE);
554
556
562 std::pair<juncture, std::string> get_next() { return (this->*m_impl)(); }
563
564private:
565 std::string_view m_input;
566
568 std::size_t m_pos = 0u;
569
571
576 using implementation = std::pair<juncture, std::string> (array_parser::*)();
577
579 static implementation
580 specialize_for_encoding(pqxx::internal::encoding_group enc);
581
583 implementation m_impl;
584
586 template<pqxx::internal::encoding_group>
587 std::pair<juncture, std::string> parse_array_step();
588
589 template<pqxx::internal::encoding_group>
590 std::string::size_type scan_double_quoted_string() const;
591 template<pqxx::internal::encoding_group>
592 std::string parse_double_quoted_string(std::string::size_type end) const;
593 template<pqxx::internal::encoding_group>
594 std::string::size_type scan_unquoted_string() const;
595 template<pqxx::internal::encoding_group>
596 std::string parse_unquoted_string(std::string::size_type end) const;
597
598 template<pqxx::internal::encoding_group>
599 std::string::size_type scan_glyph(std::string::size_type pos) const;
600 template<pqxx::internal::encoding_group>
601 std::string::size_type
602 scan_glyph(std::string::size_type pos, std::string::size_type end) const;
603};
604} // namespace pqxx
605#endif
Low-level array parser.
Definition: array.hxx:529
juncture
What's the latest thing found in the array?
Definition: array.hxx:533
std::pair< juncture, std::string > get_next()
Parse the next step in the array.
Definition: array.hxx:562
An SQL array received from the database.
Definition: array.hxx:56
constexpr auto back() const noexcept
Refer to the last element, if any.
Definition: array.hxx:158
constexpr auto cend() const noexcept
Return end point of iteration.
Definition: array.hxx:117
constexpr auto crbegin() const noexcept
Begin reverse iteration.
Definition: array.hxx:119
ELEMENT const & operator[](INDEX... index) const
Access element (without bounds check).
Definition: array.hxx:103
constexpr std::size_t size() const noexcept
Number of elements in the array.
Definition: array.hxx:127
constexpr auto ssize() const noexcept
Number of elements in the array (as a signed number).
Definition: array.hxx:145
constexpr std::size_t dimensions() noexcept
How many dimensions does this array have?
Definition: array.hxx:75
constexpr auto cbegin() const noexcept
Begin iteration of individual elements.
Definition: array.hxx:115
array(std::string_view data, connection const &conn)
Parse an SQL array, read as text from a pqxx::result or stream.
Definition: array.hxx:68
constexpr auto crend() const noexcept
Return end point of reverse iteration.
Definition: array.hxx:121
std::array< std::size_t, DIMENSIONS > const & sizes() noexcept
Return the sizes of this array in each of its dimensions.
Definition: array.hxx:82
constexpr auto front() const noexcept
Refer to the first element, if any.
Definition: array.hxx:153
Connection to a database.
Definition: connection.hxx:230
std::string concat(TYPE... item)
Efficiently combine a bunch of items into one big string.
Definition: concat.hxx:31
The home of all libpqxx classes, functions, templates, etc.
Definition: array.cxx:27
static TYPE null()
Return a null value.
static bool has_null
Does this type have a null value?
Definition: strconv.hxx:93