| Index: src/plugin/TokenSequence.h | 
| =================================================================== | 
| new file mode 100644 | 
| --- /dev/null | 
| +++ b/src/plugin/TokenSequence.h | 
| @@ -0,0 +1,163 @@ | 
| +/* | 
| + * This file is part of Adblock Plus <https://adblockplus.org/>, | 
| + * Copyright (C) 2006-2015 Eyeo GmbH | 
| + * | 
| + * Adblock Plus is free software: you can redistribute it and/or modify | 
| + * it under the terms of the GNU General Public License version 3 as | 
| + * published by the Free Software Foundation. | 
| + * | 
| + * Adblock Plus is distributed in the hope that it will be useful, | 
| + * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
| + * GNU General Public License for more details. | 
| + * | 
| + * You should have received a copy of the GNU General Public License | 
| + * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 
| + */ | 
| + | 
| +template<class T> class TokenSequenceConstIterator; // forward declaration | 
| + | 
| +/** | 
| + * A sequence of delimited tokens contained within a given string. | 
| + */ | 
| +template<class T> | 
| +class TokenSequence | 
| +{ | 
| + typedef TokenSequenceConstIterator<T> ConstIterator; | 
| 
 
sergei
2016/01/25 14:13:52
I would like to have it as a public member and nam
 
Eric
2016/01/26 15:54:19
When our style guide at https://adblockplus.org/en
 
sergei
2016/01/26 22:34:58
I thought that it's natural to adhere to common it
 
 | 
| + friend class ConstIterator; | 
| + const T delimiters; | 
| + const T value; | 
| + | 
| + /** | 
| + * Default constructor is deleted | 
| + */ | 
| + TokenSequence(); // = delete | 
| + | 
| +public: | 
| + TokenSequence(const T& value, const T& delimiters) | 
| + : value(value), delimiters(delimiters) | 
| + { | 
| + if (delimiters.empty()) | 
| + { | 
| + // If we don't have delimiter characters, the 'first_first_*' functions will have undefined behavior. | 
| + throw std::runtime_error("Must have at least one delimiter character"); | 
| + } | 
| + } | 
| + | 
| + ConstIterator cbegin() const | 
| + { | 
| + return ConstIterator(*this, ConstIterator::BeginMarker()); | 
| + } | 
| + | 
| + ConstIterator cend() const | 
| + { | 
| + return ConstIterator(*this, ConstIterator::EndMarker()); | 
| + } | 
| +}; | 
| + | 
| +/** | 
| + * Iterator class for TokenSequence<T> | 
| + * | 
| + * All constructors are private, only available to TokenSequence<T> through a friend declaration. | 
| + * | 
| + * Invariant | 
| + * if posFirstCharInToken == npos | 
| + * iterator is "end()" | 
| + * else | 
| + * 0 <= posFirstCharInToken < posFirstCharNotInToken <= length of underlying container string | 
| + * first token position = posFirstCharInToken | 
| + * token length = posFirstCharNotInToken - posFirstCharInToken | 
| + */ | 
| +template<class T> | 
| +class TokenSequenceConstIterator | 
| +{ | 
| + typedef TokenSequence<T> Container; | 
| + friend class Container; | 
| + const Container& sequence; | 
| 
 
sergei
2016/01/25 14:13:54
I'm pretty sure it generates a warning something l
 
Eric
2016/01/26 15:54:20
I am completely sure it does not generate a warnin
 
sergei
2016/01/26 22:34:57
OK, our warning level does not generate a warning
 
 | 
| + size_t posFirstCharInToken; | 
| 
 
sergei
2016/01/25 14:13:54
Wouldn't it better to name it as `tokenFirstCharPo
 
Eric
2016/01/26 15:54:20
It's "position of the first character in the token
 
sergei
2016/01/26 22:34:57
Let's say there is a string "aaa bbb ccc", after t
 
 | 
| + size_t posFirstCharNotInToken; | 
| 
 
sergei
2016/01/25 14:13:53
And this one is a position of the next delimiter,
 
Eric
2016/01/26 15:54:21
Your suggested name is not as precise as the curre
 
sergei
2016/01/26 22:34:58
That's clear that it's a right-open interval, BTW,
 
 | 
| + | 
| + class BeginMarker {}; | 
| + class EndMarker {}; | 
| + | 
| + /** | 
| + * Default constructor is deleted | 
| + */ | 
| + TokenSequenceConstIterator(); // = delete | 
| + | 
| + /** | 
| + * Constructor for cbegin() | 
| + */ | 
| + TokenSequenceConstIterator(const TokenSequence<T>& sequence, BeginMarker) | 
| + : sequence(sequence) | 
| + { | 
| + Advance(0); | 
| + } | 
| + | 
| + /** | 
| + * Constructor for cend() | 
| + */ | 
| + TokenSequenceConstIterator(const TokenSequence<T>& sequence, EndMarker) | 
| + : sequence(sequence), posFirstCharInToken(T::npos) | 
| 
 
sergei
2016/01/25 14:13:54
I feel myself uncomfortable when not all members w
 
Eric
2016/01/26 15:54:20
If you would like a comment about why 'posFirstCha
 
sergei
2016/01/26 22:34:57
Don't forget about readability and maintainability
 
 | 
| + { | 
| + } | 
| + | 
| + /** | 
| + * Find the next token beginning at or after the given position | 
| + */ | 
| + void Advance(size_t posStart) | 
| 
 
sergei
2016/01/25 14:13:54
Wouldn't it be better to rename it to something li
 
Eric
2016/01/26 15:54:19
It's better to name functions "what they do" than
 
sergei
2016/01/26 22:34:57
"advance to the next token" actually means `find n
 
 | 
| + { | 
| + posFirstCharInToken = sequence.value.find_first_not_of(sequence.delimiters, posStart); | 
| 
 
sergei
2016/01/25 14:13:53
That line "eats" all delimiters and skips empty to
 
Eric
2016/01/26 15:54:19
From my point of view, it's completely conventiona
 
sergei
2016/01/26 22:34:57
Why not to use it with comma as a delimiter and ho
 
 | 
| + if (posFirstCharInToken == T::npos) | 
| + { | 
| + // If we've become an "end()" iterator, we're done. | 
| + return; | 
| + } | 
| + posFirstCharNotInToken = sequence.value.find_first_of(sequence.delimiters, posFirstCharInToken + 1); | 
| + if (posFirstCharNotInToken == T::npos) | 
| + { | 
| + posFirstCharNotInToken = sequence.value.length(); | 
| + } | 
| + } | 
| + | 
| +public: | 
| + bool operator==(const TokenSequenceConstIterator& other) const | 
| + { | 
| + if (&sequence != &other.sequence) | 
| + { | 
| + return false; // iterators to different sequences cannot be equal | 
| + } | 
| + // Iterators with the same initial token position will also have the same final final position | 
| + // "end()" iterators use "npos" as their position | 
| + return posFirstCharInToken == other.posFirstCharInToken; | 
| + } | 
| + | 
| + bool operator!=(const TokenSequenceConstIterator& other) const | 
| + { | 
| + return !operator==(other); | 
| + } | 
| + | 
| + void operator++() | 
| 
 
sergei
2016/01/25 14:13:53
It seems we are not using it, but it can be very c
 
Eric
2016/01/26 15:54:20
Right. We're not using it. So no need for a return
 
 | 
| + { | 
| + if (posFirstCharInToken == T::npos) | 
| + { | 
| + return; // Incrementing "end()" yields "end()" | 
| 
 
sergei
2016/01/25 14:13:52
Should we throw an exception here instead?
 
Eric
2016/01/26 15:54:20
In fact, the way that this code is used in 'for' l
 
sergei
2016/01/26 22:34:58
It defensive but it protects silently, if we are t
 
 | 
| + } | 
| + if (posFirstCharNotInToken >= sequence.value.length()) | 
| + { | 
| + // We are already at the end of the sequence. No need to search more. | 
| + posFirstCharInToken = T::npos; | 
| 
 
sergei
2016/01/25 14:13:53
I'm not sure that we actually need to make an assi
 
Eric
2016/01/26 15:54:19
The very simple answer is that we're incrementing
 
sergei
2016/01/26 22:34:58
Sorry, I have overlooked it, I have wrongly read `
 
 | 
| + return; | 
| 
 
sergei
2016/01/25 14:13:54
Should we rather throw an exception?
 
Eric
2016/01/26 15:54:20
Never. Incrementing to "end()" is not an error.
 
 | 
| + } | 
| + Advance(posFirstCharNotInToken + 1); | 
| + } | 
| + | 
| + T operator*() const | 
| + { | 
| + if (posFirstCharInToken == T::npos) | 
| + { | 
| + throw std::runtime_error("Cannot dereference end() iterator"); | 
| + } | 
| + return sequence.value.substr(posFirstCharInToken, posFirstCharNotInToken - posFirstCharInToken); | 
| + } | 
| +}; |