-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcppstrings.h
More file actions
2493 lines (2120 loc) · 102 KB
/
cppstrings.h
File metadata and controls
2493 lines (2120 loc) · 102 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#pragma once
/**
Library cppstrings
"What if c++ strings where as easy to use as Python strings?"
Copyright (C) 2023-2026 Philippe Schmouker
contact - ph (dot) schmouker (at) gmail (dot) com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
//=============================================================
#include <algorithm>
#include <array>
#include <cassert>
#include <cctype>
#include <cwctype>
#include <format>
#include <limits>
#include <map>
#include <ranges>
#include <span>
#include <stdexcept>
#include <type_traits>
#include <vector>
namespace pcs // i.e. "pythonic c++ strings"
{
//=============================================================
// Forward declarations
// base class -- not to be directly instantiated, see belowing specializations instead
template<
class CharT,
class TraitsT = std::char_traits<CharT>,
class AllocatorT = std::allocator<CharT>
> class CppStringT;
// specializations of the base class -- these are the ones that should be instantiated by user.
using CppString = CppStringT<char>; //!< Specialization of basic class with template argument 'char'
using CppWString = CppStringT<wchar_t>; //!< Specialization of basic class with template argument 'wchar_t'
#if defined(_MSC_VER)
# pragma warning(push)
# pragma warning(disable: 4455) // to avoid boring warnings with litteral operators definitions
#endif
// litteral operators
inline CppString operator""_cs(const char* str, std::size_t len); //!< Forms a CppString literal.
inline CppWString operator""_cs(const wchar_t* str, std::size_t len); //!< Forms a CppWString literal.
// slices -- to be used with operator CppStringT::operator().
template<typename IntT = std::int64_t>
requires std::is_signed_v<IntT>
class Slice; //!< Base class for slices, with start, stop and step specified values
template<typename IntT = std::int64_t>
requires std::is_signed_v<IntT>
struct StartSlice; //!< struct of slices with default stop and step values
template<typename IntT = std::int64_t>
requires std::is_signed_v<IntT>
struct StopSlice; //!< struct of slices with default start and step values
template<typename IntT = std::int64_t>
requires std::is_signed_v<IntT>
struct StepSlice; //!< struct of slices with default start and stop values
template<typename IntT = std::int64_t>
requires std::is_signed_v<IntT>
struct StartStopSlice; //!< struct of slices with default step values
template<typename IntT = std::int64_t>
requires std::is_signed_v<IntT>
struct StartStepSlice; //!< struct of slices with default stop values
template<typename IntT = std::int64_t>
requires std::is_signed_v<IntT>
struct StopStepSlice; //!< struct of slices with default start values
// chars classifications -- not to be directly called, see respective specializations at the very end of this module.
template<class CharT>
inline const bool is_alpha(const CharT ch) noexcept; //!< Returns true if character ch is alphabetic, or false otherwise.
template<class CharT>
inline const bool is_ascii(const CharT ch) noexcept; //!< Returns true if character ch gets ASCII code, or false otherwise.
template<class CharT>
inline const bool is_decimal(const CharT ch) noexcept; //!< Returns true if character is a decimal digit, or false otherwise.
template<class CharT>
inline const bool is_digit(const CharT ch) noexcept; //!< Returns true if character is a decimal digit, or false otherwise.
template<class CharT>
inline const bool is_id_continue(const CharT ch) noexcept; //!< Returns true if character is a continuing char for identifiers, or false otherwise.
template<class CharT>
inline const bool is_id_start(const CharT ch) noexcept; //!< Returns true if character is a starting char for identifiers, or false otherwise.
template<class CharT>
inline const bool is_lower(const CharT ch) noexcept; //!< Returns true if character is lowercase, or false otherwise.
template<class CharT>
inline const bool is_numeric(const CharT ch) noexcept; //!< Returns true if character is a decimal digit, or false otherwise.
template<class CharT>
inline const bool is_printable(const CharT ch) noexcept; //!< Returns true if character ch is printable, or false otherwise.
template<class CharT>
inline const bool is_punctuation(const CharT ch) noexcept; //!< Returns true if character ch is punctuation, or false otherwise.
template<class CharT>
inline const bool is_space(const CharT ch) noexcept; //!< Returns true if character ch is white space, or false otherwise.
template<class CharT>
inline const bool is_upper(const CharT ch) noexcept; //!< Returns true if character is uppercase, or false otherwise.
template<class CharT>
inline const CharT swap_case(const CharT ch) noexcept; //!< Returns the swapped case form of character ch if it exists, or ch itself otherwise.
template<class CharT>
inline const CharT to_lower(const CharT ch) noexcept; //!< Returns the lowercase form of character ch if it exists, or ch itself otherwise.
template<class CharT>
inline const CharT to_upper(const CharT ch) noexcept; //!< Returns the uppercase form of character ch if it exists, or ch itself otherwise.
//===== CppStringT<> ======================================
/** \brief This is the templated base class for all CppString classes.
*
* Users should instantiate any specialization of this base class
* rather than this base class itself:
* - \see CppString for CppStringT<char>.
* - \see CppWString for CppStringT<wchar_t>.
*
* This base class inherits from std::basic_string<CharT>. As such,
* it gets direct access to all public methods of its base class.
* \see https://en.cppreference.com/w/cpp/string/basic_string for a
* full list of such methods, for instance.
*
* You may specialize it by your own with any of the next char types:
* - char8_t (C++20)
* - char16_t (C++11)
* - char32_t (C++11)
* Caution: templated method format() may be difficult to specialize
* with these types --> let us know if you succeed!
*/
template<class CharT, class TraitsT, class AllocatorT>
class CppStringT : public std::basic_string<CharT, TraitsT, AllocatorT>
{
public:
//=== Wrappers ========================================
using MyBaseClass = std::basic_string<CharT, TraitsT, AllocatorT>;
using traits_type = MyBaseClass::traits_type;
using value_type = MyBaseClass::value_type;
using allocator_type = MyBaseClass::allocator_type;
using size_type = MyBaseClass::size_type;
using difference_type = MyBaseClass::difference_type;
using reference = MyBaseClass::reference;
using const_reference = MyBaseClass::const_reference;
using pointer = MyBaseClass::pointer;
using const_pointer = MyBaseClass::const_pointer;
using iterator = MyBaseClass::iterator;
using const_iterator = MyBaseClass::const_iterator;
using reverse_iterator = MyBaseClass::reverse_iterator;
using const_reverse_iterator = MyBaseClass::const_reverse_iterator;
//=== Translation Table ===============================
/** \brief The internal class of translation tables, used with methods CppStringT::maketrans and CppStringT::translate. */
class TransTable
{
public:
//--- wrappers ------------------------------------
using key_type = CharT;
using value_type = CppStringT;
//--- Constructors / destructor -------------------
/** \brief Creates a TransTable from a standard map (#1). */
inline TransTable(const std::map<key_type, value_type> trans_table)
: m_table{ trans_table }
{}
/** \brief Creates a TransTable from two strings (#2).
*
* Parameters keys and values must have the same size. The i-th
* character in key is associated in the translation table with
* the i-th character in values.
*/
TransTable(const CppStringT& keys, const CppStringT& values)
{
assert(keys.size() == values.size());
auto val_it = values.cbegin();
for (const auto k : keys)
m_table[k] = CppStringT(*val_it++);
}
/** \brief Creates a TransTable from three strings (#3).
*
* Parameters keys and values must have the same size. The i-th
* character in key is associated in the translation table with
* the i -th character in values. Finally, the characters
* contained in string not_translated are associated in the
* translation table with the empty string.
*/
TransTable(const CppStringT& keys, const CppStringT& values, const CppStringT& not_translated)
{
assert(keys.size() == values.size());
auto val_it = values.cbegin();
for (const auto k : keys)
m_table[k] = CppStringT(*val_it++);
for (const auto k : not_translated)
m_table[k] = CppStringT();
}
/** \brief Creates a TransTable from a string and an initalization list (#4).
*
* Parameters keys and values must have the same size. The i-th
* character in key is associated in the translation table with
* the i-th character in values.
*/
inline TransTable(const CppStringT& keys, const std::initializer_list<CppStringT>& values)
{
assert(keys.size() == values.size());
auto val_it = values.begin();
for (const auto k : keys)
m_table[k] = *val_it++;
}
/** \brief Creates a TransTable from a string, an initalization list and a string (#5).
*
* Parameters keys and values must have the same size. The i-th
* character in key is associated in the translation table with
* the i -th character in values. Finally, the characters
* contained in string not_translated are associated in the
* translation table with the empty string.
*/
inline TransTable(const CppStringT& keys, const std::initializer_list<CppStringT> values, const CppStringT& not_translated)
{
assert(keys.size() == values.size());
auto val_it = values.begin();
for (const auto k : keys)
m_table[k] = *val_it++;
for (const auto k : not_translated)
m_table[k] = CppStringT();
}
/** \brief Creates a TransTable from two pointers to null-terminated lists of characters (#6).
*
* Parameters keys and values must have the same size. The i-th
* character in key is associated in the translation table with
* the i-th character in values.
*/
inline TransTable(const CharT* keys, const CharT* values)
{
while (*keys && *values)
m_table[*keys++] = value_type(*values++);
}
/** \brief Creates a TransTable from three pointers to null-terminated lists of characters (#7).
*
* Parameters keys and values must have the same size. The i-th
* character in key is associated in the translation table with
* the i -th entry in values. Finally, the characters contained
* in string not_translated are associated in the translation
* table with the empty string.
*/
inline TransTable(const CharT* keys, const CharT* values, const CharT* not_translated)
{
while (*keys && *values)
m_table[*keys++] = value_type(*values++);
while (*not_translated)
m_table[*not_translated++] = CppStringT();
}
/** \brief Creates a TransTable from two containers iterators (#8).
*
* Both containers should have the same size. The i-th
* character in key is associated in the translation
* table with the i-th entry in values.
*/
template<class KeyIt, class ValueIt>
inline TransTable(KeyIt first_key, KeyIt last_key, ValueIt first_value, ValueIt last_value)
{
KeyIt key_it{ first_key };
ValueIt val_it{ first_value };
while (key_it != last_key && val_it != last_value)
m_table[*key_it++] = value_type(*val_it++);
}
/** \brief Creates a TransTable from three containers iterators (#9).
*
* Both containers should have the same size. The i-th
* character in key is associated in the translation table with
* the i -th character in values. Finally, the characters
* contained in string not_translated are associated in the
* translation table with the empty string.
*/
template<class Key1It, class ValueIt, class Key2It>
inline TransTable(Key1It first_key, Key1It last_key,
ValueIt first_value, ValueIt last_value,
Key2It first_not_translated, Key2It last_not_translated)
{
Key1It key1_it{ first_key };
ValueIt val_it{ first_value };
while (key1_it != last_key && val_it != last_value)
m_table[*key1_it++] = value_type(*val_it++);
Key2It key2_it{ first_not_translated };
while (key2_it != last_not_translated)
m_table[*key2_it++] = CppStringT();
}
/** \brief Creates a TransTable from two string views (#10).
*
* Parameters keys and values must have the same size. The i-th
* character in key is associated in the translation table with
* the i-th character in values.
*/
/**/
template<class StringViewLike>
explicit TransTable(const StringViewLike& keys, const StringViewLike& values)
{
assert(keys.size() == values.size());
auto val_it = values.cbegin();
for (const auto k : keys)
m_table[(*k)[0]] = value_type(*val_it++);
}
inline TransTable() noexcept = default; //!< Default empty constructor.
inline TransTable(const TransTable&) noexcept = default; //!< Default copy constructor.
inline TransTable(TransTable&&) noexcept = default; //!< Default move constructor.
inline ~TransTable() noexcept = default; //!< Default descrtuctor
//--- operators -----------------------------------
inline TransTable& operator= (const TransTable&) noexcept = default; //!< Default copy assignment
inline TransTable& operator= (TransTable&&) noexcept = default; //!< Default move assignment
/** \brief Assignment operator with a standard map. */
inline TransTable& operator= (const std::map<key_type, value_type>& trans_table) noexcept
{
m_table = trans_table;
return *this;
}
/** \brief Indexing operator. */
[[nodiscard]]
inline value_type operator[] (const key_type ch) noexcept
{
auto it = m_table.find(ch);
if (it != m_table.end()) [[likely]] {
return it->second;
}
else [[unlikely]] {
return ch;
}
}
inline std::map<typename key_type, typename value_type>& get_table() noexcept //!< for tests purposes
{
return m_table;
}
private:
std::map<typename key_type, typename value_type> m_table{}; // the internal storage of the translation table. Access it via the indexing operator.
};
//=== Constructors / Destructor =======================
inline CppStringT() : MyBaseClass() {} // #1
inline CppStringT(const CppStringT& other) : MyBaseClass(other) {} // #2
inline CppStringT(const CppStringT& other, const AllocatorT& alloc) : MyBaseClass(other, alloc) {} // #3
inline CppStringT(CppStringT&& other) noexcept : MyBaseClass(other) {} // #4
inline CppStringT(CppStringT&& other, const AllocatorT& alloc) noexcept : MyBaseClass(other, alloc) {} // #5
inline CppStringT(MyBaseClass::size_type count, CharT ch) : MyBaseClass(count, ch) {} // #6
inline CppStringT(const CppStringT& other, size_type pos) : MyBaseClass(other, pos) {} // #7
inline CppStringT(const CppStringT& other, size_type pos, size_type count) noexcept : MyBaseClass(other, pos, count) {} // #8
inline CppStringT(const CharT* s) // #9
: MyBaseClass(s ? s : CppStringT().c_str())
{}
inline CppStringT(const CharT* s, size_type count) // #10
: MyBaseClass(s ? s : CppStringT().c_str(), count)
{}
inline CppStringT(std::initializer_list<CharT> ilist) : MyBaseClass(ilist) {} // #11
inline CppStringT(const CharT ch) : MyBaseClass(1, ch) {} // #19
inline CppStringT(const MyBaseClass& other) : MyBaseClass(other) {} // #12
inline CppStringT(const MyBaseClass& other, const AllocatorT& alloc) : MyBaseClass(other, alloc) {} // #13
inline CppStringT(MyBaseClass&& other) : MyBaseClass(other) {} // #14
inline CppStringT(MyBaseClass&& other, const AllocatorT& alloc) : MyBaseClass(other, alloc) {} // #15
template<class InputIt>
inline CppStringT(InputIt first, InputIt last) : MyBaseClass(first, last) {} // #16
template<class StringViewLike>
explicit CppStringT(StringViewLike& svl) : MyBaseClass(svl) {} // #17
template<class StringViewLike>
CppStringT(StringViewLike& svl, size_type pos, size_type n) : MyBaseClass(svl, pos, n) {} // #18
inline ~CppStringT() noexcept = default;
//=== Assignment operators ============================
CppStringT& operator= (const CppStringT&) noexcept = default; //!< Default copy assignment
CppStringT& operator= (CppStringT&&) noexcept = default; //!< Default move assignment
//=== Exceptions ======================================
class NotFoundException : public std::logic_error
{
public:
using MyBaseClass = std::logic_error;
inline NotFoundException(const std::string& what_arg) : MyBaseClass(what_arg) {}
inline NotFoundException(const char* what_arg) : MyBaseClass(what_arg) {}
};
//=== Methods =========================================
//--- capitalize() ------------------------------------
/** \brief Returns a copy of the string with its first character capitalized and the rest lowercased. */
inline CppStringT capitalize() noexcept
{
CppStringT res(*this);
if (!res.empty()) [[likely]] {
res.lower();
res[0] = pcs::to_upper(res[0]);
}
return res;
}
//--- center() ----------------------------------------
/** \brief Returns a copy of the string centered in a string of length width.
*
* Padding is done using the specified fillchar (default is an ASCII space).
* A copy of the original string is returned if width is less than or equal
* to the length of the string. The original string remains unchanged.
*/
[[nodiscard]]
CppStringT center(const size_type width, const value_type fillch = value_type(' ')) const noexcept
{
const size_type len{ this->size() };
if (width <= len) [[unlikely]]
return CppStringT(*this);
const size_type half{ (width - len) / 2 };
return CppStringT(half, fillch) + *this + CppStringT(width - half - len, fillch);
}
//--- contains() --------------------------------------
/** \brief Returns true if this string contains the passed string, or false otherwise.
*
* This is a c++ implementation of Python keyword 'in' applied to strings.
*/
[[nodiscard]]
constexpr bool contains(const CppStringT& substr) const noexcept
{
if (substr.empty()) [[unlikely]]
// the empty string is always contained in any string
return true;
#if (defined(_HAS_CXX23) && _HAS_CXX23) || (!defined(_HAS_CXX23) && __cplusplus >= 202302L)
// c++23 and above already defines this method
return MyBaseClass::contains(substr);
#else
// up to c++20, we have to implement this method
const size_type substr_width{ substr.size() };
const size_type width{ this->size() };
if (substr_width > width) [[unlikely]]
return false;
for (size_type index = 0; index <= width - substr_width; ++index) [[likely]] {
if (substr == this->substr(index, substr_width))
return true;
}
return false;
#endif
}
//--- contains_n() ------------------------------------
/** \brief Returns true if the passed string is found within the slice str[start:start+count-1], or false otherwise.
*
* This is a c++ implementation of Python keyword 'in' applied to Python sliced strings.
*/
[[nodiscard]]
inline constexpr bool contains_n(const CppStringT& sub, const size_type start, const size_type count = -1) const noexcept
{
try {
return this->substr(start, count).contains(sub);
}
catch (...) {
return false;
}
}
//--- count() -----------------------------------------
/** \brief Returns the number of non-overlapping occurrences of substring sub in the range [start, end]. */
[[nodiscard]]
constexpr size_type count(const CppStringT& sub, const size_type start = 0, const size_type end = -1) const noexcept
{
size_type n = 0;
CppStringT tmp{ this->substr(start, std::min(this->size(), end) - start + 1) };
size_type start_{ 0 };
size_type end_{ tmp.size() };
while ((start_ = tmp.find(sub, start_, end_)) != CppStringT::npos) {
start_ += sub.size();
end_ -= start_;
tmp = tmp.substr(start_, std::min(tmp.size(), end_) + 1);
start_ = 0;
n++;
}
return n;
}
//--- count_n() ---------------------------------------
/** \brief Returns the number of non-overlapping occurrences of substring sub in the range [start, start+length-1]. */
[[nodiscard]]
inline constexpr size_type count_n(const CppStringT& sub, const size_type start, const size_type length) const noexcept
{
return count(sub, start, start + length - 1);
}
/** \brief Returns the number of non-overlapping occurrences of substring sub in the range [0, length-1]. */
[[nodiscard]]
inline constexpr size_type count_n(const CppStringT& sub, const size_type length) const noexcept
{
return count(sub, 0, length - 1);
}
//--- endswith() --------------------------------------
/** \brief Returns true if the string ends with the specified suffix, otherwise returns false. Test begins at start position and stops at end position. */
[[nodiscard]]
inline const bool endswith(const CppStringT& suffix, const size_type start, const size_type end) const noexcept
{
return this->substr(start, end - start + 1).MyBaseClass::ends_with(suffix);
}
/** \brief Returns true if the string ends with the specified suffix, otherwise returns false. Test begins at start of string and stops at end position. */
[[nodiscard]]
inline const bool endswith(const CppStringT& suffix, const size_type end) const noexcept
{
return this->substr(0, end).MyBaseClass::ends_with(suffix);
}
/** \brief Returns true if the string ends with the specified suffix, otherwise returns false. Test runs on the whole string. */
[[nodiscard]]
inline const bool endswith(const CppStringT& suffix) const noexcept
{
return static_cast<const bool>(MyBaseClass::ends_with(suffix));
}
/** \brief Returns true if the string ends with any of the specified suffixes, otherwise returns false. Test begins at start position and stops at end position. */
[[nodiscard]]
const bool endswith(const std::initializer_list<CppStringT>& suffixes, const size_type start, const size_type end) const noexcept
{
if (start > end) [[unlikely]]
return false;
CppStringT tmp(this->substr(start, end - start + 1));
for (auto& suffix : suffixes) {
if (tmp.ends_with(suffix)) [[unlikely]]
return true;
}
return false;
}
//--- endswith_n() ------------------------------------
/** \brief Returns true if the string ends with the specified suffix, otherwise returns false. Test begins at start position and stops after count positions. */
[[nodiscard]]
inline const bool endswith_n(const CppStringT& suffix, const size_type start, const size_type count) const noexcept
{
return endswith(suffix, start, start + count - 1);
}
/** \brief Returns true if the string ends with the specified suffix, otherwise returns false. Test begins at position 0 and stops after count positions. */
[[nodiscard]]
inline const bool endswith_n(const CppStringT& suffix, const size_type count) const noexcept
{
return endswith(suffix, 0, count - 1);
}
/** \brief Returns true if the string ends with any of the specified suffixes, otherwise returns false. Test begins at start position and stops after count positions. */
[[nodiscard]]
inline const bool endswith_n(const std::initializer_list<CppStringT>& suffixes, const size_type start, const size_type count) const noexcept
{
return endswith(suffixes, start, start + count - 1);
}
//--- expand_tabs() -----------------------------------
/** \brief Returns a copy of the string where all tab characters are replaced by one or more spaces, depending on the current column and the given tab size. */
[[nodiscard]]
CppStringT expand_tabs(const size_type tabsize = 8) const noexcept
{
const size_type tabsize_{ tabsize == 0 ? 1 : tabsize };
CppStringT ret{};
std::size_t current_pos{ 0 };
for (const value_type ch : *this) {
if (ch == value_type('\t')) [[unlikely]] {
do {
ret += value_type(' ');
current_pos++;
} while (current_pos % tabsize_ != 0);
}
else if (ch == value_type('\n') || ch == value_type('\r')) [[unlikely]] {
ret += ch;
current_pos = 0;
}
else [[likely]] {
ret += ch;
current_pos++;
}
}
return ret;
}
//--- find() ------------------------------------------
/** \brief Returns the lowest index in the string where substring sub is found within the slice str[start:end], or -1 (i.e. 'npos') if sub is not found.
*
* Note: this method should be used only if you need to know the position of
* sub. To check if sub is a substring or not, use the method contains().
*
* CAUTION: empty substrings are considered to be in the string if start and
* end positions are both less than the string size and if start <= end.
*
* \see find_n(), rfind() and rfind_n().
* \see index(), index_n(), rindex() and rindex_n().
*/
[[nodiscard]]
constexpr size_type find(const CppStringT& sub, const size_type start = 0, const size_type end = -1) const noexcept
{
const size_type end_{ (end == -1) ? this->size() : end };
if (start > end_) [[unlikely]]
return CppStringT::npos;
else [[likely]]
return find_n(sub, start, end_ - start + 1);
}
//--- find_n() ----------------------------------------
/** \brief Returns the lowest index in the string where substring sub is found within the slice str[start:start+count-1], or -1 (i.e. 'npos') if sub is not found.
*
* Note: this method should be used only if you need to know the position of
* sub. To check if sub is a substring or not, use the method contains_n().
*
* CAUTION: empty substrings are considered to be in the string if start and
* end positions are both less than the string size and if start <= end. The
* returned position is 0.
*
* \see find(), rfind() and rfind_n().
* \see index(), index_n(), rindex() and rindex_n().
*/
[[nodiscard]]
inline constexpr size_type find_n(const CppStringT& sub, const size_type start, const size_type count) const noexcept
{
constexpr size_type npos{ CppStringT::npos };
try {
const CppStringT sub_str{ this->substr(start, count) };
const size_type found_pos{ sub_str.MyBaseClass::find(sub) };
return (found_pos == npos) ? npos : found_pos + start;
}
catch (...) {
return npos;
}
}
/** \brief Returns the lowest index in the string where substring sub is found within the slice str[0:count-1], or -1 (i.e. 'npos') if sub is not found.
*
* Note: this method should be used only if you need to know the position of
* sub. To check if sub is a substring or not, use the method contains_n().
*
* CAUTION: empty substrings are considered to be in the string if start and
* end positions are both less than the string size and if start <= end. The
* returned position is 0.
*
* \see find(), rfind() and rfind_n().
* \see index(), index_n(), rindex() and rindex_n().
*/
[[nodiscard]]
inline constexpr size_type find_n(const CppStringT& sub, const size_type count) const noexcept
{
return find_n(sub, 0, count);
}
//--- format() ----------------------------------------
/** \brief Formats this string according to c++20 std::format() specification. Returns this string. */
template<typename T, class... ArgsT>
inline CppStringT& format(
const std::basic_format_string<T, std::type_identity_t<ArgsT>...> frmt,
ArgsT&&... args
)
{
return *this;
}
template<class... ArgsT>
inline CppStringT& format( //!< specialzation for char's
const std::basic_format_string<char , std::type_identity_t<ArgsT>... > frmt,
ArgsT&&... args
)
{
return *this = std::vformat(frmt.get(), std::make_format_args(args...));
}
template<class... ArgsT>
inline CppStringT& format( //!< specialzation for wchar_t's
const std::basic_format_string<wchar_t, std::type_identity_t<ArgsT>... > frmt,
ArgsT&&... args
)
{
return *this = std::vformat(frmt.get(), std::make_wformat_args(args...));
}
//--- index() -----------------------------------------
/** \brief Like find(const CppStringT&), but raises NotFoundException when the substring sub is not found.
*
* \see index_n(), rindex() and rindex_n().
* \see find(), find_n(), rfind() and rfind_n().
*/
[[nodiscard]]
inline constexpr size_type index(const CppStringT& sub, const size_type start = 0, const size_type end = -1) const
{
const size_type ret_value = find(sub, start, end);
if (ret_value == CppStringT::npos)
throw NotFoundException("substring not found in string.");
else
return ret_value;
}
//--- index_n() ---------------------------------------
/** \brief Like find_n(sub, start, count), but raises NotFoundException when the substring is not found.
*
* \see index_n(), rindex() and rindex_n().
* \see find(), find_n(), rfind() and rfind_n().
*/
[[nodiscard]]
inline constexpr size_type index_n(const CppStringT& sub, const size_type start, const size_type count) const
{
return index(sub, start, start + count - 1);
}
/** \brief Like find_n(sub, count), but raises NotFoundException when the substring is not found.
*
* \see index_n(), rindex() and rindex_n().
* \see find(), find_n(), rfind() and rfind_n().
*/
[[nodiscard]]
inline constexpr size_type index_n(const CppStringT& sub, const size_type count) const
{
return index(sub, 0, count);
}
//--- isalnum() ---------------------------------------
/** \brief Returns true if all characters in the string are alphanumeric and there is at least one character, or false otherwise. */
[[nodiscard]]
inline const bool isalnum() const noexcept
{
if (this->empty()) [[unlikely]]
return false;
else [[likely]]
return std::all_of(
this->cbegin(),
this->cend(),
[](const value_type ch) { return pcs::is_alpha(ch) || pcs::is_decimal(ch) || pcs::is_digit(ch) || pcs::is_numeric(ch); }
);
}
//--- isalpha() --------------------------------------
/** \brief Returns true if all characters in the string are alphabetic and there is at least one character, or false otherwise. */
[[nodiscard]]
inline const bool isalpha() const noexcept
{
return !this->empty() && std::all_of(this->cbegin(), this->cend(), pcs::is_alpha<value_type>);
}
//--- isascii() ---------------------------------------
/** \brief Returns true if the string is empty or all characters in the string are ASCII, or false otherwise. */
#if defined(isascii) // may be already defined in header file <ctype.h>
#undef isascii
#endif
[[nodiscard]]
inline const bool isascii() const noexcept
{
return this->empty() || std::all_of(this->cbegin(), this->cend(), pcs::is_ascii<value_type>);
}
//--- isdecimal() -------------------------------------
/** \brief Returns true if all characters in the string are decimal characters and there is at least one character, or false otherwise.
*
* Decimal characters are those that can be used to form numbers in
* base 10, e.g. U+0660, ARABIC-INDIC DIGIT ZERO. Formally a decimal
* character is a character in the Unicode General Category �Nd�.
*/
[[nodiscard]]
inline const bool isdecimal() const noexcept
{
return !this->empty() && std::all_of(this->cbegin(), this->cend(), pcs::is_decimal<value_type>);
}
//--- isdigit() ---------------------------------------
/** \brief Returns true if all characters in the string are digits and there is at least one character, or false otherwise.
*
* Digits include decimal characters and digits that need special
* handling, such as the compatibility superscript digits. This
* covers digits which cannot be used to form numbers in base 10,
* like the Kharosthi numbers. Formally, a digit is a character
* that has the property value Numeric_Type=Digit or Numeric_Type
* =Decimal.
*
* CAUTION: current implementation of library cpp-strings does
* not implement above algorithm. It just returns the same result
* as 'isdecimal()' which is NOT what Python str library does.
*/
[[nodiscard]]
inline const bool isdigit() const noexcept
{
return isdecimal();
}
//--- isidentifier() ----------------------------------
/** \brief Returns true if the string is not empty and is a valid identifier according to the language definition, or false otherwise.
*
* CAUTION: the current implementation of this method does not deal with the proper c++
* defintiion of identifiers (see https://en.cppreference.com/w/cpp/language/identifiers
* and https://www.unicode.org/reports/tr31/#Table_Lexical_Classes_for_Identifiers).
*
* While the specification of identifiers in c++ is this one:
*
* identifier ::= XID_Start XID_Continue*
* XID_Start ::= ID_Start XID_Continue*
* ID_Start ::= <characters derived from the Unicode General_Category of uppercase letters, lowercase letters, titlecase letters, modifier letters, other letters, letter numbers, plus Other_ID_Start, minus Pattern_Syntax and Pattern_White_Space code points>
* XID_Continue ::= <characters derived from ID_Continue as per Unicode specs Section 5.1, NFKC Modifications (https://www.unicode.org/reports/tr31/#NFKC_Modifications)>
* ID_Continue ::= ID_Start | <characters having the Unicode General_Category of nonspacing marks, spacing combining marks, decimal number, connector punctuation, plus Other_ID_Continue, minus Pattern_Syntax and Pattern_White_Space code points>
*
* the currently implemented rule is this simpler one:
*
* identifier ::= ID_Start id_continue*
* id_continue ::= ID_Start | decimal_number
*/
[[nodiscard]]
inline const bool isidentifier() const noexcept
{
return !this->empty() && pcs::is_id_start((*this)[0]) && (this->size() == 1 || std::all_of(this->cbegin() + 1, this->cend(), pcs::is_id_continue<value_type>));
}
//--- islower() ---------------------------------------
/** \brief Returns true if all cased characters in the string are lowercase and there is at least one cased character, or false otherwise. */
[[nodiscard]]
inline const bool islower() const noexcept
{
return !this->empty() && std::all_of(this->cbegin(), this->cend(), pcs::is_lower<value_type>);
}
//--- isnumeric() -------------------------------------
/** \brief Returns true if all characters in the string are numeric characters, and there is at least one character, or false otherwise.
*
* CAUTION: current implementation just returns isdecimal() result,
* while the description of isnumeric() should be this one:
* Numeric characters include digit characters, and all characters
* that have the Unicode numeric value property. Formally, numeric
* characters are those with the property value Numeric_Type=Digit,
* Numeric_Type=Decimal or Numeric_Type=Numeric.
*/
[[nodiscard]]
inline const bool isnumeric() const noexcept
{
return isdecimal();
}
//--- isprintable() -----------------------------------
/** \brief Returns true if all characters in the string are printable or if the string is empty, or false otherwise.
*
* Nonprintable characters are those characters defined in the Unicode
* character database as "Other" or "Separator", excepting the ASCII
* space (0x20) which is considered printable.
*/
[[nodiscard]]
inline const bool isprintable() const noexcept
{
return this->empty() || std::all_of(this->cbegin(), this->cend(), pcs::is_printable<value_type>);
}
//--- ispunctuation() ---------------------------------
/** \brief Returns true if the string contains only one character and if this character belongs to the ASCII punctuation set. */
[[nodiscard]]
inline const bool ispunctuation() const noexcept
{
return this->size() == 1 && pcs::is_punctuation((*this)[0]);
}
//--- isspace() ---------------------------------------
/** \brief Returns true if there are only whitespace characters in the string and there is at least one character, or false otherwise. */
[[nodiscard]]
inline const bool isspace() const noexcept
{
return !this->empty() && std::all_of(this->cbegin(), this->cend(), pcs::is_space<value_type>);
}
//--- istitle() ---------------------------------------
/** \brief Returns true if the string is a titlecased string and there is at least one character, or false otherwise.
*
* For instance uppercase characters may only follow uncased
* characters and lowercase characters only cased ones.
*
* CAUTION: current implementation only tests for uppercase
* characters following whitespaces and lowercase characters
* anywhere else.
*/
[[nodiscard]]
inline const bool istitle() const noexcept
{
return !this->empty() && this->title() == *this;
}
//--- isupper() ---------------------------------------
/** \brief Returns true if all cased characters in the string are uppercase and there is at least one cased character, or false otherwise. */
[[nodiscard]]
inline const bool isupper() const noexcept
{
return !this->empty() && std::all_of(this->cbegin(), this->cend(), pcs::is_upper<value_type>);
}
//--- is_words_sep() ----------------------------------
/** \brief Returns true if there are only whitespace and punctuation characters in the string and there is at least one character, or false otherwise. */
[[nodiscard]]
inline const bool is_words_sep() const noexcept
{
return !this->empty() &&
std::all_of(
this->cbegin(),
this->cend(),
[](const value_type ch) { return pcs::is_space(ch) || pcs::is_punctuation(ch); }
);
}
//--- join() ------------------------------------------
/** \brief Returns a string which is the concatenation of the strings in the array parameter.
*
* The separator between elements is the string to which this method is applied.
*/
template<const std::size_t N>
[[nodiscard]]
CppStringT join(const std::array<CppStringT, N>& strs) const noexcept