 Issue 29342824:
  Issue 4044 - Added handling for __future__ unicode_literals import to check_quotes()  (Closed)
    
  
    Issue 29342824:
  Issue 4044 - Added handling for __future__ unicode_literals import to check_quotes()  (Closed) 
  | Index: flake8-abp/flake8_abp.py | 
| =================================================================== | 
| --- a/flake8-abp/flake8_abp.py | 
| +++ b/flake8-abp/flake8_abp.py | 
| @@ -39,16 +39,18 @@ | 
| } | 
| ESSENTIAL_BUILTINS = set(dir(builtins)) - {'apply', 'buffer', 'coerce', | 
| 'intern', 'file'} | 
| LEAVE_BLOCK = (ast.Return, ast.Raise, ast.Continue, ast.Break) | 
| VOLATILE = object() | 
| +IS_UNICODE_LITERALS = False | 
| 
Sebastian Noack
2016/05/25 08:31:36
Uppercase notation is only for variables that are
 | 
| + | 
| def evaluate(node): | 
| try: | 
| return eval(compile(ast.Expression(node), '', 'eval'), {}) | 
| except Exception: | 
| return VOLATILE | 
| @@ -372,28 +374,31 @@ | 
| return (0, 'A303 non-default file encoding') | 
| check_non_default_encoding.name = 'abp-non-default-encoding' | 
| check_non_default_encoding.version = __version__ | 
| def check_quotes(logical_line, tokens, previous_logical): | 
| first_token = True | 
| - global is_unicode_literals | 
| + global IS_UNICODE_LITERALS | 
| + | 
| + # --- check if this is beginning of file | 
| 
Sebastian Noack
2016/05/25 08:31:36
We generally don use --- in comments. So please re
 
Vasily Kuznetsov
2016/05/25 13:55:31
Also pep8 recommends capitalising the first letter
 | 
| + if tokens[0][3][0] == 1: | 
| + IS_UNICODE_LITERALS = False | 
| + | 
| + # --- check if in unicode_literals mode | 
| + token_strings = [t[1] for t in tokens] | 
| 
Sebastian Noack
2016/05/25 08:31:35
I wonder whether we should also check for the toke
 
Vasily Kuznetsov
2016/05/25 13:55:32
Do we really need to? Can you imagine a line that
 
Sebastian Noack
2016/05/25 14:45:29
I feel that checking for the token type is more co
 
Vasily Kuznetsov
2016/05/25 16:25:56
I see, it does feel a bit sloppy to just check the
 | 
| + if token_strings[:3] == ['from', '__future__', 'import']: | 
| + IS_UNICODE_LITERALS = 'unicode_literals' in token_strings | 
| 
Vasily Kuznetsov
2016/05/25 13:55:32
Won't this break if it gets a piece of code like t
 | 
| for kind, token, start, end, _ in tokens: | 
| if kind == tokenize.INDENT or kind == tokenize.DEDENT: | 
| continue | 
| - if start[0] == 1: | 
| - is_unicode_literals = False | 
| - | 
| - if logical_line == 'from __future__ import unicode_literals': | 
| - is_unicode_literals = True | 
| - | 
| if kind == tokenize.STRING: | 
| match = re.search(r'^(u)?(b)?(r)?((""")?.*)$', | 
| token, re.IGNORECASE | re.DOTALL) | 
| (is_unicode, is_bytes, is_raw, | 
| literal, has_doc_quotes) = match.groups() | 
| if first_token and re.search(r'^(?:(?:def|class)\s|$)', | 
| previous_logical): | 
| @@ -402,21 +407,24 @@ | 
| 'quotes for docstrings') | 
| elif is_unicode or is_bytes or is_raw: | 
| yield (start, "A109 don't use u'', b'' " | 
| "or r'' for doc strings") | 
| elif start[0] == end[0]: | 
| if is_raw: | 
| literal = re.sub(r'\\(?!{})'.format(literal[0]), | 
| '\\\\\\\\', literal) | 
| - | 
| if sys.version_info[0] >= 3: | 
| if is_bytes: | 
| literal = 'b' + literal | 
| - elif is_unicode or is_unicode_literals: | 
| + elif is_unicode and not IS_UNICODE_LITERALS: | 
| 
Sebastian Noack
2016/05/25 08:31:35
It seems the check for IS_UNICODE_LITERALS is inco
 
Vasily Kuznetsov
2016/05/25 13:55:32
I second that it should come before the Python 3 c
 
Sebastian Noack
2016/05/25 14:45:29
Just an idea, how about moving it even above the c
 
Vasily Kuznetsov
2016/05/25 16:25:56
Moving the check to above the check for docstrings
 | 
| + yield(start, 'A112 use "from __future__ import"' | 
| 
Sebastian Noack
2016/05/25 08:31:35
There should be a space after "yield". Otherwise i
 
Sebastian Noack
2016/05/25 08:31:35
Please document A112 in the README.
 | 
| + 'unicode_literals instead of prefixing' | 
| 
Sebastian Noack
2016/05/25 08:31:35
Please indent long error messages like it's done i
 | 
| + 'literals with "u"') | 
| + elif not is_bytes: | 
| literal = 'u' + literal | 
| if ascii(eval(literal)) != literal: | 
| yield (start, "A110 string literal doesn't match " | 
| '{}()'.format(ascii.__name__)) | 
| first_token = False |