OLD | NEW |
1 # This file is part of Adblock Plus <https://adblockplus.org/>, | 1 # This file is part of Adblock Plus <https://adblockplus.org/>, |
2 # Copyright (C) 2006-2017 eyeo GmbH | 2 # Copyright (C) 2006-2017 eyeo GmbH |
3 # | 3 # |
4 # Adblock Plus is free software: you can redistribute it and/or modify | 4 # Adblock Plus is free software: you can redistribute it and/or modify |
5 # it under the terms of the GNU General Public License version 3 as | 5 # it under the terms of the GNU General Public License version 3 as |
6 # published by the Free Software Foundation. | 6 # published by the Free Software Foundation. |
7 # | 7 # |
8 # Adblock Plus is distributed in the hope that it will be useful, | 8 # Adblock Plus is distributed in the hope that it will be useful, |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | 9 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 # GNU General Public License for more details. | 11 # GNU General Public License for more details. |
12 # | 12 # |
13 # You should have received a copy of the GNU General Public License | 13 # You should have received a copy of the GNU General Public License |
14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 14 # along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
15 | 15 |
16 """Helper classes that handle IO for parsing and rendering.""" | 16 """Helper classes that handle IO for filter list parsing and rendering.""" |
17 | 17 |
18 import io | 18 import io |
19 from os import path | 19 from os import path |
20 | 20 |
21 try: | 21 try: |
22 from urllib2 import urlopen, HTTPError | 22 from urllib2 import urlopen, HTTPError |
23 except ImportError: # The module was renamed in Python 3. | 23 except ImportError: # The module was renamed in Python 3. |
24 from urllib.request import urlopen | 24 from urllib.request import urlopen |
25 from urllib.error import HTTPError | 25 from urllib.error import HTTPError |
26 | 26 |
27 __all__ = ['FSSource', 'TopSource', 'WebSource', 'NotFound'] | 27 __all__ = ['NotFound', 'FSSource', 'TopSource', 'WebSource'] |
28 | 28 |
29 | 29 |
30 class NotFound(Exception): | 30 class NotFound(Exception): |
31 """Requested file doesn't exist in this source. | 31 """Requested file doesn't exist in this source. |
32 | 32 |
33 The file with requested name doesn't exist. If this results from an | 33 The file with requested name doesn't exist. If this results from an |
34 include, the including list probably contains an error. | 34 include, the including list probably contains an error. |
35 """ | 35 """ |
36 | 36 |
37 | 37 |
38 class FSSource(object): | 38 class FSSource(object): |
39 """Directory on the filesystem. | 39 """Directory on the filesystem. |
40 | 40 |
41 :param root_path: The path to the directory. | 41 Parameters |
42 :param encoding: Encoding to use for reading the files (default: utf-8). | 42 ---------- |
| 43 root_path : str |
| 44 The path to the directory. |
| 45 encoding : str |
| 46 Encoding to use for reading the files (default: utf-8). |
| 47 |
43 """ | 48 """ |
44 | 49 |
45 is_inheritable = True | 50 is_inheritable = True |
46 | 51 |
47 def __init__(self, root_path, encoding='utf-8'): | 52 def __init__(self, root_path, encoding='utf-8'): |
48 root_path = path.abspath(root_path) | 53 root_path = path.abspath(root_path) |
49 self.root_path = root_path | 54 self.root_path = root_path |
50 self.encoding = encoding | 55 self.encoding = encoding |
51 | 56 |
52 def resolve_path(self, path_in_source): | 57 def _resolve_path(self, path_in_source): |
53 parts = path_in_source.split('/') | 58 parts = path_in_source.split('/') |
54 full_path = path.abspath(path.join(self.root_path, *parts)) | 59 full_path = path.abspath(path.join(self.root_path, *parts)) |
55 if not full_path.startswith(self.root_path): | 60 if not full_path.startswith(self.root_path): |
56 raise ValueError("Invalid path: '{}'".format(path_in_source)) | 61 raise ValueError("Invalid path: '{}'".format(path_in_source)) |
57 return full_path | 62 return full_path |
58 | 63 |
59 def get(self, path_in_source): | 64 def get(self, path_in_source): |
60 full_path = self.resolve_path(path_in_source) | 65 """Read file from the source. |
| 66 |
| 67 Parameters |
| 68 ---------- |
| 69 path_in_source : str |
| 70 Path to the file inside of the source. |
| 71 |
| 72 Returns |
| 73 ------- |
| 74 iterable of str |
| 75 Lines of the file. |
| 76 |
| 77 """ |
| 78 full_path = self._resolve_path(path_in_source) |
61 try: | 79 try: |
62 with io.open(full_path, encoding=self.encoding) as open_file: | 80 with io.open(full_path, encoding=self.encoding) as open_file: |
63 for line in open_file: | 81 for line in open_file: |
64 yield line.rstrip() | 82 yield line.rstrip() |
65 except IOError as exc: | 83 except IOError as exc: |
66 if exc.errno == 2: # No such file or directory. | 84 if exc.errno == 2: # No such file or directory. |
67 raise NotFound("File not found: '{}'".format(full_path)) | 85 raise NotFound("File not found: '{}'".format(full_path)) |
68 raise exc | 86 raise exc |
69 | 87 |
70 | 88 |
71 class TopSource(FSSource): | 89 class TopSource(FSSource): |
72 """Current directory without path conversion. | 90 """Current directory without path conversion. |
73 | 91 |
74 Also supports absolute paths. This source is used for the top fragment. | 92 Also supports absolute paths. This source is used for the top fragment. |
75 | 93 |
76 :param encoding: Encoding to use for reading the files (default: utf-8). | 94 Parameters |
| 95 ---------- |
| 96 encoding : str |
| 97 Encoding to use for reading the files (default: utf-8). |
| 98 |
77 """ | 99 """ |
78 | 100 |
79 is_inheritable = False | 101 is_inheritable = False |
80 | 102 |
81 def __init__(self, encoding='utf-8'): | 103 def __init__(self, encoding='utf-8'): |
82 super(TopSource, self).__init__('.', encoding) | 104 super(TopSource, self).__init__('.', encoding) |
83 | 105 |
84 def resolve_path(self, path_in_source): | 106 def _resolve_path(self, path_in_source): |
85 return path_in_source | 107 return path_in_source |
86 | 108 |
87 | 109 |
88 class WebSource(object): | 110 class WebSource(object): |
89 """Handler for http or https. | 111 """Handler for http or https. |
90 | 112 |
91 :param protocol: "http" or "https". | 113 Parameters |
92 :param default_encoding: Encoding to use when the server doesn't specify | 114 ---------- |
93 it (default: utf-8). | 115 protocol : str |
| 116 Protocol to use: "http" or "https". |
| 117 default_encoding : str |
| 118 Encoding to use when the server doesn't specify it (default: utf-8). |
| 119 |
94 """ | 120 """ |
95 | 121 |
96 is_inheritable = False | 122 is_inheritable = False |
97 | 123 |
98 def __init__(self, protocol, default_encoding='utf-8'): | 124 def __init__(self, protocol, default_encoding='utf-8'): |
99 self.protocol = protocol | 125 self.protocol = protocol |
100 self.default_encoding = default_encoding | 126 self.default_encoding = default_encoding |
101 | 127 |
102 def get(self, path_in_source): | 128 def get(self, path_in_source): |
| 129 """Read file from the source. |
| 130 |
| 131 Parameters |
| 132 ---------- |
| 133 path_in_source : str |
| 134 The rest of the URL after "http(s):". |
| 135 |
| 136 Returns |
| 137 ------- |
| 138 iterable of str |
| 139 Lines of the file. |
| 140 |
| 141 """ |
103 url = '{}:{}'.format(self.protocol, path_in_source) | 142 url = '{}:{}'.format(self.protocol, path_in_source) |
104 try: | 143 try: |
105 response = urlopen(url) | 144 response = urlopen(url) |
106 info = response.info() | 145 info = response.info() |
107 # info.getparam became info.get_param in Python 3 so we'll | 146 # info.getparam became info.get_param in Python 3 so we'll |
108 # try both. | 147 # try both. |
109 get_param = (getattr(info, 'get_param', None) or | 148 get_param = (getattr(info, 'get_param', None) or |
110 getattr(info, 'getparam', None)) | 149 getattr(info, 'getparam', None)) |
111 encoding = get_param('charset') or self.default_encoding | 150 encoding = get_param('charset') or self.default_encoding |
112 for line in response: | 151 for line in response: |
113 yield line.decode(encoding).rstrip() | 152 yield line.decode(encoding).rstrip() |
114 except HTTPError as err: | 153 except HTTPError as err: |
115 if err.code == 404: | 154 if err.code == 404: |
116 raise NotFound("HTTP 404 Not found: '{}:{}'" | 155 raise NotFound("HTTP 404 Not found: '{}:{}'" |
117 .format(self.protocol, path_in_source)) | 156 .format(self.protocol, path_in_source)) |
118 raise err | 157 raise err |
OLD | NEW |