Index: src/plugin/TokenSequence.h |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/src/plugin/TokenSequence.h |
@@ -0,0 +1,163 @@ |
+/* |
+ * This file is part of Adblock Plus <https://adblockplus.org/>, |
+ * Copyright (C) 2006-2015 Eyeo GmbH |
+ * |
+ * Adblock Plus is free software: you can redistribute it and/or modify |
+ * it under the terms of the GNU General Public License version 3 as |
+ * published by the Free Software Foundation. |
+ * |
+ * Adblock Plus is distributed in the hope that it will be useful, |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+ * GNU General Public License for more details. |
+ * |
+ * You should have received a copy of the GNU General Public License |
+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
+ */ |
+ |
+template<class T> class TokenSequenceConstIterator; // forward declaration |
+ |
+/** |
+ * A sequence of delimited tokens contained within a given string. |
+ */ |
+template<class T> |
+class TokenSequence |
+{ |
+ typedef TokenSequenceConstIterator<T> ConstIterator; |
sergei
2016/01/25 14:13:52
I would like to have it as a public member and nam
Eric
2016/01/26 15:54:19
When our style guide at https://adblockplus.org/en
sergei
2016/01/26 22:34:58
I thought that it's natural to adhere to common it
|
+ friend class ConstIterator; |
+ const T delimiters; |
+ const T value; |
+ |
+ /** |
+ * Default constructor is deleted |
+ */ |
+ TokenSequence(); // = delete |
+ |
+public: |
+ TokenSequence(const T& value, const T& delimiters) |
+ : value(value), delimiters(delimiters) |
+ { |
+ if (delimiters.empty()) |
+ { |
+ // If we don't have delimiter characters, the 'first_first_*' functions will have undefined behavior. |
+ throw std::runtime_error("Must have at least one delimiter character"); |
+ } |
+ } |
+ |
+ ConstIterator cbegin() const |
+ { |
+ return ConstIterator(*this, ConstIterator::BeginMarker()); |
+ } |
+ |
+ ConstIterator cend() const |
+ { |
+ return ConstIterator(*this, ConstIterator::EndMarker()); |
+ } |
+}; |
+ |
+/** |
+ * Iterator class for TokenSequence<T> |
+ * |
+ * All constructors are private, only available to TokenSequence<T> through a friend declaration. |
+ * |
+ * Invariant |
+ * if posFirstCharInToken == npos |
+ * iterator is "end()" |
+ * else |
+ * 0 <= posFirstCharInToken < posFirstCharNotInToken <= length of underlying container string |
+ * first token position = posFirstCharInToken |
+ * token length = posFirstCharNotInToken - posFirstCharInToken |
+ */ |
+template<class T> |
+class TokenSequenceConstIterator |
+{ |
+ typedef TokenSequence<T> Container; |
+ friend class Container; |
+ const Container& sequence; |
sergei
2016/01/25 14:13:54
I'm pretty sure it generates a warning something l
Eric
2016/01/26 15:54:20
I am completely sure it does not generate a warnin
sergei
2016/01/26 22:34:57
OK, our warning level does not generate a warning
|
+ size_t posFirstCharInToken; |
sergei
2016/01/25 14:13:54
Wouldn't it better to name it as `tokenFirstCharPo
Eric
2016/01/26 15:54:20
It's "position of the first character in the token
sergei
2016/01/26 22:34:57
Let's say there is a string "aaa bbb ccc", after t
|
+ size_t posFirstCharNotInToken; |
sergei
2016/01/25 14:13:53
And this one is a position of the next delimiter,
Eric
2016/01/26 15:54:21
Your suggested name is not as precise as the curre
sergei
2016/01/26 22:34:58
That's clear that it's a right-open interval, BTW,
|
+ |
+ class BeginMarker {}; |
+ class EndMarker {}; |
+ |
+ /** |
+ * Default constructor is deleted |
+ */ |
+ TokenSequenceConstIterator(); // = delete |
+ |
+ /** |
+ * Constructor for cbegin() |
+ */ |
+ TokenSequenceConstIterator(const TokenSequence<T>& sequence, BeginMarker) |
+ : sequence(sequence) |
+ { |
+ Advance(0); |
+ } |
+ |
+ /** |
+ * Constructor for cend() |
+ */ |
+ TokenSequenceConstIterator(const TokenSequence<T>& sequence, EndMarker) |
+ : sequence(sequence), posFirstCharInToken(T::npos) |
sergei
2016/01/25 14:13:54
I feel myself uncomfortable when not all members w
Eric
2016/01/26 15:54:20
If you would like a comment about why 'posFirstCha
sergei
2016/01/26 22:34:57
Don't forget about readability and maintainability
|
+ { |
+ } |
+ |
+ /** |
+ * Find the next token beginning at or after the given position |
+ */ |
+ void Advance(size_t posStart) |
sergei
2016/01/25 14:13:54
Wouldn't it be better to rename it to something li
Eric
2016/01/26 15:54:19
It's better to name functions "what they do" than
sergei
2016/01/26 22:34:57
"advance to the next token" actually means `find n
|
+ { |
+ posFirstCharInToken = sequence.value.find_first_not_of(sequence.delimiters, posStart); |
sergei
2016/01/25 14:13:53
That line "eats" all delimiters and skips empty to
Eric
2016/01/26 15:54:19
From my point of view, it's completely conventiona
sergei
2016/01/26 22:34:57
Why not to use it with comma as a delimiter and ho
|
+ if (posFirstCharInToken == T::npos) |
+ { |
+ // If we've become an "end()" iterator, we're done. |
+ return; |
+ } |
+ posFirstCharNotInToken = sequence.value.find_first_of(sequence.delimiters, posFirstCharInToken + 1); |
+ if (posFirstCharNotInToken == T::npos) |
+ { |
+ posFirstCharNotInToken = sequence.value.length(); |
+ } |
+ } |
+ |
+public: |
+ bool operator==(const TokenSequenceConstIterator& other) const |
+ { |
+ if (&sequence != &other.sequence) |
+ { |
+ return false; // iterators to different sequences cannot be equal |
+ } |
+ // Iterators with the same initial token position will also have the same final final position |
+ // "end()" iterators use "npos" as their position |
+ return posFirstCharInToken == other.posFirstCharInToken; |
+ } |
+ |
+ bool operator!=(const TokenSequenceConstIterator& other) const |
+ { |
+ return !operator==(other); |
+ } |
+ |
+ void operator++() |
sergei
2016/01/25 14:13:53
It seems we are not using it, but it can be very c
Eric
2016/01/26 15:54:20
Right. We're not using it. So no need for a return
|
+ { |
+ if (posFirstCharInToken == T::npos) |
+ { |
+ return; // Incrementing "end()" yields "end()" |
sergei
2016/01/25 14:13:52
Should we throw an exception here instead?
Eric
2016/01/26 15:54:20
In fact, the way that this code is used in 'for' l
sergei
2016/01/26 22:34:58
It defensive but it protects silently, if we are t
|
+ } |
+ if (posFirstCharNotInToken >= sequence.value.length()) |
+ { |
+ // We are already at the end of the sequence. No need to search more. |
+ posFirstCharInToken = T::npos; |
sergei
2016/01/25 14:13:53
I'm not sure that we actually need to make an assi
Eric
2016/01/26 15:54:19
The very simple answer is that we're incrementing
sergei
2016/01/26 22:34:58
Sorry, I have overlooked it, I have wrongly read `
|
+ return; |
sergei
2016/01/25 14:13:54
Should we rather throw an exception?
Eric
2016/01/26 15:54:20
Never. Incrementing to "end()" is not an error.
|
+ } |
+ Advance(posFirstCharNotInToken + 1); |
+ } |
+ |
+ T operator*() const |
+ { |
+ if (posFirstCharInToken == T::npos) |
+ { |
+ throw std::runtime_error("Cannot dereference end() iterator"); |
+ } |
+ return sequence.value.substr(posFirstCharInToken, posFirstCharNotInToken - posFirstCharInToken); |
+ } |
+}; |