xarfile.py - Issue 29349885: Issue 4340 - Drop dependency on external xar tool

Side by Side Diff: xarfile.py

Issue 29349885: Issue 4340 - Drop dependency on external xar tool (Closed)

Patch Set: Added template for ToC generation and rebased Created Aug. 17, 2016, 10:04 a.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # This Source Code Form is subject to the terms of the Mozilla Public

	2 # License, v. 2.0. If a copy of the MPL was not distributed with this

	3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

	4

	5 import re

	6 import struct

	7 import time

	8 import zlib

	9

	10 from Crypto.Hash import SHA

	11 from Crypto.PublicKey import RSA

	12 from Crypto.Signature import PKCS1_v1_5

	13

	14 from buildtools.packager import getTemplate

	15

	16 XAR_HEADER_MAGIC = 0x78617221

	17 XAR_HEADER_SIZE = 28

	18 XAR_VERSION = 1

	19 XAR_CKSUM_SHA1 = 1

	20

	21 PRIVATE_KEY_REGEXP = r'-+BEGIN PRIVATE KEY-+(.*?)-+END PRIVATE KEY-+'

	22 CERTIFICATE_REGEXP = r'-+BEGIN CERTIFICATE-+(.*?)-+END CERTIFICATE-+'

	23

	24

	25 def read_key(keyfile):

	26 with open(keyfile, 'r') as file:

	27 data = file.read()

	28 match = re.search(PRIVATE_KEY_REGEXP, data, re.S)
	Sebastian Noack 2016/08/17 12:53:45 You can leave the with block after the data has be You can leave the with block after the data has been read. However, as mentioned on the other review, it seems simpler (less duplication), if you just combine those two function: def read_bio(keyfile): with open(keyfile, 'r') as file: data = file.read() certs = [] key = None for match in re.finditer(r'-+BEGIN (.?)-+(.?)-+END \1-+', data, re.S): what, content = match.groups() if what == 'CERTIFICATE': certs.append(content) if what == 'PRIVATE KEY': key = RSA.importKey(content) if not key: raise Exception('Cound not find private key in file') return certs, key Wladimir Palant 2016/08/17 14:11:40 Done. Show quoted text On 2016/08/17 12:53:45, Sebastian Noack wrote: > You can leave the with block after the data has been read. > > However, as mentioned on the other review, it seems simpler (less duplication), > if you just combine those two function: > > def read_bio(keyfile): > with open(keyfile, 'r') as file: > data = file.read() > > certs = [] > key = None > for match in re.finditer(r'-+BEGIN (.?)-+(.?)-+END \1-+', data, re.S): > what, content = match.groups() > if what == 'CERTIFICATE': > certs.append(content) > if what == 'PRIVATE KEY': > key = RSA.importKey(content) > > if not key: > raise Exception('Cound not find private key in file') > > return certs, key Done.
	29 if not match:

	30 raise Exception('Cound not find private key in file')

	31 return RSA.importKey(match.group(0))

	32

	33

	34 def read_certificates(keyfile):

	35 certificates = []

	36 with open(keyfile, 'r') as file:

	37 data = file.read()

	38 for match in re.finditer(CERTIFICATE_REGEXP, data, re.S):

	39 certificates.append(re.sub(r'\s+', '', match.group(1)))

	40 return certificates

	41

	42

	43 def get_checksum(data):

	44 return SHA.new(data).digest()

	45

	46

	47 def get_hexchecksum(data):

	48 return SHA.new(data).hexdigest()

	49

	50

	51 def get_signature(key, data):

	52 return PKCS1_v1_5.new(key).sign(SHA.new(data))

	53

	54

	55 def compress_files(filedata, root, offset):

	56 compressed_data = []

	57 filedata = sorted(filedata.iteritems())

	58 directory_stack = [('', root)]

	59 file_id = 1

	60 for path, data in filedata:

	61 # Remove directories that are done

	62 while not path.startswith(directory_stack[-1][0]):

	63 directory_stack.pop()

	64

	65 # Add new directories

	66 directory_path = directory_stack[-1][0]

	67 relpath = path[len(directory_path):]

	68 while '/' in relpath:

	69 name, relpath = relpath.split('/', 1)

	70 directory_path += name + '/'

	71 directory = {

	72 'id': file_id,

	73 'name': name,

	74 'type': 'directory',

	75 'mode': '0755',

	76 'children': [],

	77 }

	78 file_id += 1

	79 directory_stack[-1][1].append(directory)

	80 directory_stack.append((directory_path, directory['children']))

	81

	82 # Add the actual file

	83 compressed = zlib.compress(data, 9)

	84 file = {

	85 'id': file_id,

	86 'name': relpath,

	87 'type': 'file',

	88 'mode': '0644',

	89 'checksum_uncompressed': get_hexchecksum(data),

	90 'size_uncompressed': len(data),

	91 'checksum_compressed': get_hexchecksum(compressed),

	92 'size_compressed': len(compressed),

	93 'offset': offset,

	94 }

	95 file_id += 1

	96 offset += len(compressed)

	97 directory_stack[-1][1].append(file)

	98 compressed_data.append(compressed)

	99 return compressed_data

	100

	101

	102 def create(archivepath, contents, keyfile):

	103 key = read_key(keyfile)

	104 checksum_length = len(get_checksum(''))
	Sebastian Noack 2016/08/17 12:53:45 No need to hash any (empty) data to get the digest No need to hash any (empty) data to get the digest size: SHA.SHA1Hash.digest_size Wladimir Palant 2016/08/17 14:11:40 Strictly speaking - no, it's not necessary. Howeve Show quoted text On 2016/08/17 12:53:45, Sebastian Noack wrote: > No need to hash any (empty) data to get the digest size: > > SHA.SHA1Hash.digest_size Strictly speaking - no, it's not necessary. However, I'd rather not spread implementation details throughout the code. It doesn't have to be SHA1 and I strongly suspect that at some point in the future it longer will be.
	105 params = {

	106 'certificates': read_certificates(keyfile),

	107

	108 # Timestamp epoch starts at 2001-01-01T00:00:00.000Z

	109 'timestamp_numerical': time.time() - 978307200,

	110 'timestamp_iso': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()),

	111

	112 'checksum': {

	113 'offset': 0,

	114 'size': checksum_length,

	115 },

	116 'signature': {

	117 'offset': checksum_length,

	118 'size': len(get_signature(key, '')),

	119 },

	120 'files': [],

	121 }

	122

	123 offset = params['signature']['offset'] + params['signature']['size']

	124 compressed_data = compress_files(contents, params['files'], offset)

	125

	126 template = getTemplate('xartoc.xml.tmpl', autoEscape=True)

	127 toc_uncompressed = template.render(params).encode('utf-8')

	128 toc_compressed = zlib.compress(toc_uncompressed, 9)

	129

	130 with open(archivepath, 'wb') as file:

	131 # The file starts with a minimalistic header

	132 header = struct.pack('>IHHQQI', XAR_HEADER_MAGIC, XAR_HEADER_SIZE,
	Sebastian Noack 2016/08/17 12:53:45 Note that you could avoid hard-coding the header s Note that you could avoid hard-coding the header size: HEADER_STRUCT = struct.Struct('>IHHQQI') ... header = HEADER_STRUCT.pack(XAR_HEADER_MAGIC, HEADER_STRUCT.size, ...) Wladimir Palant 2016/08/17 14:11:40 Done. Show quoted text On 2016/08/17 12:53:45, Sebastian Noack wrote: > Note that you could avoid hard-coding the header size: > > HEADER_STRUCT = struct.Struct('>IHHQQI') > ... > header = HEADER_STRUCT.pack(XAR_HEADER_MAGIC, HEADER_STRUCT.size, ...) Done.
	133 XAR_VERSION, len(toc_compressed),

	134 len(toc_uncompressed), XAR_CKSUM_SHA1)

	135 file.write(header)

	136

	137 # It's followed up with a compressed XML table of contents

	138 file.write(toc_compressed)

	139

	140 # Now the actual data, all the offsets are in the table of contents

	141 file.write(get_checksum(toc_compressed))

	142 file.write(get_signature(key, toc_compressed))

	143 for blob in compressed_data:

	144 file.write(blob)

OLD	NEW

« no previous file with comments | « packagerSafari.py ('k') | xartoc.xml.tmpl » ('j') | xartoc.xml.tmpl » ('J')