| Index: src/plugin/TokenSequence.h |
| =================================================================== |
| new file mode 100644 |
| --- /dev/null |
| +++ b/src/plugin/TokenSequence.h |
| @@ -0,0 +1,163 @@ |
| +/* |
| + * This file is part of Adblock Plus <https://adblockplus.org/>, |
| + * Copyright (C) 2006-2015 Eyeo GmbH |
| + * |
| + * Adblock Plus is free software: you can redistribute it and/or modify |
| + * it under the terms of the GNU General Public License version 3 as |
| + * published by the Free Software Foundation. |
| + * |
| + * Adblock Plus is distributed in the hope that it will be useful, |
| + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| + * GNU General Public License for more details. |
| + * |
| + * You should have received a copy of the GNU General Public License |
| + * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
| + */ |
| + |
| +template<class T> class TokenSequenceConstIterator; // forward declaration |
| + |
| +/** |
| + * A sequence of delimited tokens contained within a given string. |
| + */ |
| +template<class T> |
| +class TokenSequence |
| +{ |
| + typedef TokenSequenceConstIterator<T> ConstIterator; |
|
sergei
2016/01/25 14:13:52
I would like to have it as a public member and nam
Eric
2016/01/26 15:54:19
When our style guide at https://adblockplus.org/en
sergei
2016/01/26 22:34:58
I thought that it's natural to adhere to common it
|
| + friend class ConstIterator; |
| + const T delimiters; |
| + const T value; |
| + |
| + /** |
| + * Default constructor is deleted |
| + */ |
| + TokenSequence(); // = delete |
| + |
| +public: |
| + TokenSequence(const T& value, const T& delimiters) |
| + : value(value), delimiters(delimiters) |
| + { |
| + if (delimiters.empty()) |
| + { |
| + // If we don't have delimiter characters, the 'first_first_*' functions will have undefined behavior. |
| + throw std::runtime_error("Must have at least one delimiter character"); |
| + } |
| + } |
| + |
| + ConstIterator cbegin() const |
| + { |
| + return ConstIterator(*this, ConstIterator::BeginMarker()); |
| + } |
| + |
| + ConstIterator cend() const |
| + { |
| + return ConstIterator(*this, ConstIterator::EndMarker()); |
| + } |
| +}; |
| + |
| +/** |
| + * Iterator class for TokenSequence<T> |
| + * |
| + * All constructors are private, only available to TokenSequence<T> through a friend declaration. |
| + * |
| + * Invariant |
| + * if posFirstCharInToken == npos |
| + * iterator is "end()" |
| + * else |
| + * 0 <= posFirstCharInToken < posFirstCharNotInToken <= length of underlying container string |
| + * first token position = posFirstCharInToken |
| + * token length = posFirstCharNotInToken - posFirstCharInToken |
| + */ |
| +template<class T> |
| +class TokenSequenceConstIterator |
| +{ |
| + typedef TokenSequence<T> Container; |
| + friend class Container; |
| + const Container& sequence; |
|
sergei
2016/01/25 14:13:54
I'm pretty sure it generates a warning something l
Eric
2016/01/26 15:54:20
I am completely sure it does not generate a warnin
sergei
2016/01/26 22:34:57
OK, our warning level does not generate a warning
|
| + size_t posFirstCharInToken; |
|
sergei
2016/01/25 14:13:54
Wouldn't it better to name it as `tokenFirstCharPo
Eric
2016/01/26 15:54:20
It's "position of the first character in the token
sergei
2016/01/26 22:34:57
Let's say there is a string "aaa bbb ccc", after t
|
| + size_t posFirstCharNotInToken; |
|
sergei
2016/01/25 14:13:53
And this one is a position of the next delimiter,
Eric
2016/01/26 15:54:21
Your suggested name is not as precise as the curre
sergei
2016/01/26 22:34:58
That's clear that it's a right-open interval, BTW,
|
| + |
| + class BeginMarker {}; |
| + class EndMarker {}; |
| + |
| + /** |
| + * Default constructor is deleted |
| + */ |
| + TokenSequenceConstIterator(); // = delete |
| + |
| + /** |
| + * Constructor for cbegin() |
| + */ |
| + TokenSequenceConstIterator(const TokenSequence<T>& sequence, BeginMarker) |
| + : sequence(sequence) |
| + { |
| + Advance(0); |
| + } |
| + |
| + /** |
| + * Constructor for cend() |
| + */ |
| + TokenSequenceConstIterator(const TokenSequence<T>& sequence, EndMarker) |
| + : sequence(sequence), posFirstCharInToken(T::npos) |
|
sergei
2016/01/25 14:13:54
I feel myself uncomfortable when not all members w
Eric
2016/01/26 15:54:20
If you would like a comment about why 'posFirstCha
sergei
2016/01/26 22:34:57
Don't forget about readability and maintainability
|
| + { |
| + } |
| + |
| + /** |
| + * Find the next token beginning at or after the given position |
| + */ |
| + void Advance(size_t posStart) |
|
sergei
2016/01/25 14:13:54
Wouldn't it be better to rename it to something li
Eric
2016/01/26 15:54:19
It's better to name functions "what they do" than
sergei
2016/01/26 22:34:57
"advance to the next token" actually means `find n
|
| + { |
| + posFirstCharInToken = sequence.value.find_first_not_of(sequence.delimiters, posStart); |
|
sergei
2016/01/25 14:13:53
That line "eats" all delimiters and skips empty to
Eric
2016/01/26 15:54:19
From my point of view, it's completely conventiona
sergei
2016/01/26 22:34:57
Why not to use it with comma as a delimiter and ho
|
| + if (posFirstCharInToken == T::npos) |
| + { |
| + // If we've become an "end()" iterator, we're done. |
| + return; |
| + } |
| + posFirstCharNotInToken = sequence.value.find_first_of(sequence.delimiters, posFirstCharInToken + 1); |
| + if (posFirstCharNotInToken == T::npos) |
| + { |
| + posFirstCharNotInToken = sequence.value.length(); |
| + } |
| + } |
| + |
| +public: |
| + bool operator==(const TokenSequenceConstIterator& other) const |
| + { |
| + if (&sequence != &other.sequence) |
| + { |
| + return false; // iterators to different sequences cannot be equal |
| + } |
| + // Iterators with the same initial token position will also have the same final final position |
| + // "end()" iterators use "npos" as their position |
| + return posFirstCharInToken == other.posFirstCharInToken; |
| + } |
| + |
| + bool operator!=(const TokenSequenceConstIterator& other) const |
| + { |
| + return !operator==(other); |
| + } |
| + |
| + void operator++() |
|
sergei
2016/01/25 14:13:53
It seems we are not using it, but it can be very c
Eric
2016/01/26 15:54:20
Right. We're not using it. So no need for a return
|
| + { |
| + if (posFirstCharInToken == T::npos) |
| + { |
| + return; // Incrementing "end()" yields "end()" |
|
sergei
2016/01/25 14:13:52
Should we throw an exception here instead?
Eric
2016/01/26 15:54:20
In fact, the way that this code is used in 'for' l
sergei
2016/01/26 22:34:58
It defensive but it protects silently, if we are t
|
| + } |
| + if (posFirstCharNotInToken >= sequence.value.length()) |
| + { |
| + // We are already at the end of the sequence. No need to search more. |
| + posFirstCharInToken = T::npos; |
|
sergei
2016/01/25 14:13:53
I'm not sure that we actually need to make an assi
Eric
2016/01/26 15:54:19
The very simple answer is that we're incrementing
sergei
2016/01/26 22:34:58
Sorry, I have overlooked it, I have wrongly read `
|
| + return; |
|
sergei
2016/01/25 14:13:54
Should we rather throw an exception?
Eric
2016/01/26 15:54:20
Never. Incrementing to "end()" is not an error.
|
| + } |
| + Advance(posFirstCharNotInToken + 1); |
| + } |
| + |
| + T operator*() const |
| + { |
| + if (posFirstCharInToken == T::npos) |
| + { |
| + throw std::runtime_error("Cannot dereference end() iterator"); |
| + } |
| + return sequence.value.substr(posFirstCharInToken, posFirstCharNotInToken - posFirstCharInToken); |
| + } |
| +}; |