Coverage for torrentfile\hasher.py: 100%
261 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-27 21:50 -0700
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-27 21:50 -0700
1#! /usr/bin/python3
2# -*- coding: utf-8 -*-
4##############################################################################
5# Copyright (C) 2021-current alexpdev
6#
7# Licensed under the Apache License, Version 2.0 (the "License");
8# you may not use this file except in compliance with the License.
9# You may obtain a copy of the License at
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS,
15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18##############################################################################
19"""
20Piece/File Hashers for Bittorrent meta file contents.
21"""
23import os
24import logging
25from hashlib import sha1, sha256 # nosec
27from torrentfile.mixins import CbMixin, ProgMixin
28from torrentfile.utils import next_power_2
30BLOCK_SIZE = 2**14 # 16KiB
31HASH_SIZE = 32
33logger = logging.getLogger(__name__)
36class Hasher(CbMixin, ProgMixin):
37 """
38 Piece hasher for Bittorrent V1 files.
40 Takes a sorted list of all file paths, calculates sha1 hash
41 for fixed size pieces of file data from each file
42 seemlessly until the last piece which may be smaller than others.
44 Parameters
45 ----------
46 paths : list
47 List of files.
48 piece_length : int
49 Size of chuncks to split the data into.
50 align: bool
51 flag to indicate if the torrent should be piece aligned
52 progress: int
53 the progress mode
54 progress_bar: [Optional] ProgressBar
55 a progress bar object if progress mode is 2
56 """
58 def __init__(
59 self,
60 paths: list,
61 piece_length: int,
62 align: bool = False,
63 progress: int = 1,
64 progress_bar=None,
65 ):
66 """Generate hashes of piece length data from filelist contents."""
67 self.piece_length = piece_length
68 self.paths = paths
69 self.align = align
70 self.total = sum(os.path.getsize(i) for i in self.paths)
71 self.index = 0
72 self.current = open(self.paths[0], "rb")
73 self.progress = progress
74 self.progbar = progress_bar
75 if self.progress == 1:
76 file_size = os.path.getsize(self.paths[0])
77 self.progbar = self.get_progress_tracker(file_size, self.paths[0])
78 logger.debug("Hashing %s", str(self.paths[0]))
80 def __iter__(self):
81 """
82 Iterate through feed pieces.
84 Returns
85 -------
86 self : iterator
87 Iterator for leaves/hash pieces.
88 """
89 return self
91 def _handle_partial(self, arr: bytearray) -> bytearray:
92 """
93 Define the handling partial pieces that span 2 or more files.
95 Parameters
96 ----------
97 arr : bytearray
98 Incomplete piece containing partial data
100 Returns
101 -------
102 digest : bytearray
103 SHA1 digest of the complete piece.
104 """
105 if self.align:
106 target = self.piece_length - len(arr)
107 temp = bytearray(target)
108 arr.extend(temp)
109 return sha1(arr).digest() # nosec
111 while len(arr) < self.piece_length and self.next_file():
112 target = self.piece_length - len(arr)
113 temp = bytearray(target)
114 size = self.current.readinto(temp)
115 self.progbar.update(size)
116 arr.extend(temp[:size])
117 if size == target:
118 break
119 return sha1(arr).digest() # nosec
121 def next_file(self) -> bool:
122 """
123 Seemlessly transition to next file in file list.
125 Returns
126 -------
127 bool:
128 True if there is a next file otherwise False.
129 """
130 self.index += 1
131 if self.progress == 1:
132 self.progbar.close_out()
133 if self.index < len(self.paths):
134 path = self.paths[self.index]
135 if self.progress == 1:
136 total = os.path.getsize(path)
137 self.progbar = self.get_progress_tracker(total, path)
138 logger.debug("Hashing %s", str(path))
139 self.current.close()
140 self.current = open(path, "rb")
141 return True
142 return False
144 def __next__(self) -> bytes:
145 """
146 Generate piece-length pieces of data from input file list.
148 Returns
149 -------
150 bytes
151 SHA1 hash of the piece extracted.
152 """
153 while True:
154 piece = bytearray(self.piece_length)
155 size = self.current.readinto(piece)
156 self.progbar.update(size)
157 if size == 0:
158 if not self.next_file():
159 raise StopIteration
160 elif size < self.piece_length:
161 return self._handle_partial(piece[:size])
162 else:
163 return sha1(piece).digest() # nosec
166def merkle_root(blocks: list) -> bytes:
167 """
168 Calculate the merkle root for a seq of sha256 hash digests.
170 Parameters
171 ----------
172 blocks : list
173 a sequence of sha256 layer hashes.
175 Returns
176 -------
177 bytes
178 the sha256 root hash of the merkle tree.
179 """
180 if blocks:
181 while len(blocks) > 1:
182 blocks = [
183 sha256(x + y).digest() for x, y in zip(*[iter(blocks)] * 2)
184 ]
185 return blocks[0]
186 return blocks
189class HasherV2(CbMixin, ProgMixin):
190 """
191 Calculate the root hash and piece layers for file contents.
193 Iterates over 16KiB blocks of data from given file, hashes the data,
194 then creates a hash tree from the individual block hashes until size of
195 hashed data equals the piece-length. Then continues the hash tree until
196 root hash is calculated.
198 Parameters
199 ----------
200 path : str
201 Path to file.
202 piece_length : int
203 Size of layer hashes pieces.
204 progress: int
205 the progress mode
206 progress_bar: [Optional] ProgressBar
207 a progress bar object if progress mode is 2
208 """
210 def __init__(
211 self,
212 path: str,
213 piece_length: int,
214 progress: int = 1,
215 progress_bar=None,
216 ):
217 """
218 Calculate and store hash information for specific file.
219 """
220 self.path = path
221 self.root = None
222 self.piece_layer = None
223 self.layer_hashes = []
224 self.piece_length = piece_length
225 self.num_blocks = piece_length // BLOCK_SIZE
226 self.progress = progress
227 self.progbar = progress_bar
228 if self.progress == 1:
229 size = os.path.getsize(self.path)
230 self.progbar = self.get_progress_tracker(size, self.path)
231 with open(self.path, "rb") as fd:
232 self.process_file(fd)
234 def process_file(self, fd: str):
235 """
236 Calculate hashes over 16KiB chuncks of file content.
238 Parameters
239 ----------
240 fd : BytesIO
241 Opened file in read mode.
242 """
243 while True:
244 blocks = []
245 leaf = bytearray(BLOCK_SIZE)
246 # generate leaves of merkle tree
248 for _ in range(self.num_blocks):
249 size = fd.readinto(leaf)
250 if not size:
251 break
252 self.progbar.update(size)
253 blocks.append(sha256(leaf[:size]).digest())
255 # blocks is empty mean eof
256 if not blocks:
257 break
258 if len(blocks) != self.num_blocks:
259 # when size of file doesn't fill the last block
260 # when the file contains multiple pieces
261 remaining = self.num_blocks - len(blocks)
262 if not self.layer_hashes:
263 # when the there is only one block for file
264 power2 = next_power_2(len(blocks))
265 remaining = power2 - len(blocks)
267 # pad the the rest with zeroes to fill remaining space.
268 padding = [bytes(32) for _ in range(remaining)]
269 blocks.extend(padding)
270 # calculate the root hash for the merkle tree up to piece-length
272 layer_hash = merkle_root(blocks)
273 self.cb(layer_hash)
274 self.layer_hashes.append(layer_hash)
275 if self.progress == 1:
276 self.progbar.close_out()
277 self._calculate_root()
279 def _calculate_root(self):
280 """
281 Calculate root hash for the target file.
282 """
283 self.piece_layer = b"".join(self.layer_hashes)
284 hashes = len(self.layer_hashes)
285 if hashes > 1:
286 pow2 = next_power_2(hashes)
287 remainder = pow2 - hashes
288 pad_piece = [bytes(HASH_SIZE) for _ in range(self.num_blocks)]
289 for _ in range(remainder):
290 self.layer_hashes.append(merkle_root(pad_piece))
291 self.root = merkle_root(self.layer_hashes)
294class HasherHybrid(CbMixin, ProgMixin):
295 """
296 Calculate root and piece hashes for creating hybrid torrent file.
298 Create merkle tree layers from sha256 hashed 16KiB blocks of contents.
299 With a branching factor of 2, merge layer hashes until blocks equal
300 piece_length bytes for the piece layer, and then the root hash.
302 Parameters
303 ----------
304 path : str
305 path to target file.
306 piece_length : int
307 piece length for data chunks.
308 progress: int
309 the progress mode
310 progress_bar: [Optional] ProgressBar
311 a progress bar object if progress mode is 2
312 """
314 def __init__(
315 self,
316 path: str,
317 piece_length: int,
318 progress: int = 1,
319 progress_bar=None,
320 ):
321 """
322 Construct Hasher class instances for each file in torrent.
323 """
324 self.path = path
325 self.piece_length = piece_length
326 self.pieces = []
327 self.layer_hashes = []
328 self.piece_layer = None
329 self.root = None
330 self.padding_piece = None
331 self.padding_file = None
332 self.amount = piece_length // BLOCK_SIZE
333 self.progress = progress
334 self.progbar = progress_bar
335 if self.progress == 1:
336 size = os.path.getsize(self.path)
337 self.progbar = self.get_progress_tracker(size, self.path)
338 with open(path, "rb") as data:
339 self.process_file(data)
341 def _pad_remaining(self, block_count: int):
342 """
343 Generate Hash sized, 0 filled bytes for padding.
345 Parameters
346 ----------
347 block_count : int
348 current total number of blocks collected.
350 Returns
351 -------
352 padding : bytes
353 Padding to fill remaining portion of tree.
354 """
355 # when the there is only one block for file
356 remaining = self.amount - block_count
357 if not self.layer_hashes:
358 power2 = next_power_2(block_count)
359 remaining = power2 - block_count
360 return [bytes(HASH_SIZE) for _ in range(remaining)]
362 def process_file(self, data: bytearray):
363 """
364 Calculate layer hashes for contents of file.
366 Parameters
367 ----------
368 data : BytesIO
369 File opened in read mode.
370 """
371 while True:
372 plength = self.piece_length
373 blocks = []
374 piece = sha1() # nosec
375 total = 0
376 block = bytearray(BLOCK_SIZE)
377 for _ in range(self.amount):
378 size = data.readinto(block)
379 self.progbar.update(size)
380 if not size:
381 break
382 total += size
383 plength -= size
384 blocks.append(sha256(block[:size]).digest())
385 piece.update(block[:size])
386 if not blocks:
387 break
388 if len(blocks) != self.amount:
389 padding = self._pad_remaining(len(blocks))
390 blocks.extend(padding)
391 layer_hash = merkle_root(blocks)
392 self.cb(layer_hash)
393 self.layer_hashes.append(layer_hash)
394 if plength > 0:
395 self.padding_file = {
396 "attr": "p",
397 "length": plength,
398 "path": [".pad", str(plength)],
399 }
400 piece.update(bytes(plength))
401 self.pieces.append(piece.digest()) # nosec
402 if self.progress == 1:
403 self.progbar.close_out()
404 self._calculate_root()
406 def _calculate_root(self):
407 """
408 Calculate the root hash for opened file.
410 **DEPRECATED**
411 """
412 self.piece_layer = b"".join(self.layer_hashes)
414 if len(self.layer_hashes) > 1:
415 pad_piece = merkle_root([bytes(32) for _ in range(self.amount)])
417 pow2 = next_power_2(len(self.layer_hashes))
418 remainder = pow2 - len(self.layer_hashes)
420 self.layer_hashes += [pad_piece for _ in range(remainder)]
421 self.root = merkle_root(self.layer_hashes)
424class FileHasher(CbMixin, ProgMixin):
425 """
426 Calculate root and piece hashes for creating hybrid torrent file.
428 Create merkle tree layers from sha256 hashed 16KiB blocks of contents.
429 With a branching factor of 2, merge layer hashes until blocks equal
430 piece_length bytes for the piece layer, and then the root hash.
432 Parameters
433 ----------
434 path : str
435 path to target file.
436 piece_length : int
437 piece length for data chunks.
438 hybrid : bool
439 flag to indicate if it's a hybrid torrent
440 progress: int
441 the progress mode
442 progress_bar: [Optional] ProgressBar
443 a progress bar object if progress mode is 2
444 """
446 def __init__(
447 self,
448 path: str,
449 piece_length: int,
450 progress: int = 1,
451 hybrid: bool = False,
452 progress_bar=None,
453 ):
454 """
455 Construct Hasher class instances for each file in torrent.
456 """
457 self.path = path
458 self.piece_length = piece_length
459 self.pieces = []
460 self.layer_hashes = []
461 self.piece_layer = None
462 self.root = None
463 self.padding_piece = None
464 self.padding_file = None
465 self.amount = piece_length // BLOCK_SIZE
466 self.end = False
467 self.progress = progress
468 self.progbar = progress_bar
469 if self.progress == 1:
470 size = os.path.getsize(self.path)
471 self.progbar = self.get_progress_tracker(size, self.path)
472 self.current = open(path, "rb")
473 self.hybrid = hybrid
475 def __iter__(self):
476 """Return `self`: needed to implement iterator implementation."""
477 return self
479 def _pad_remaining(self, block_count: int):
480 """
481 Generate Hash sized, 0 filled bytes for padding.
483 Parameters
484 ----------
485 block_count : int
486 current total number of blocks collected.
488 Returns
489 -------
490 padding : bytes
491 Padding to fill remaining portion of tree.
492 """
493 # when the there is only one block for file
494 remaining = self.amount - block_count
495 if not self.layer_hashes:
496 power2 = next_power_2(block_count)
497 remaining = power2 - block_count
498 return [bytes(HASH_SIZE) for _ in range(remaining)]
500 def __next__(self) -> bytes:
501 """
502 Calculate layer hashes for contents of file.
504 Returns
505 -------
506 bytes
507 The layer merckle root hash.
509 Raises
510 ------
511 StopIteration
512 Halts the iterator from progressing
513 """
514 if self.end:
515 self.end = False
516 raise StopIteration
517 plength = self.piece_length
518 blocks = []
519 piece = sha1() # nosec
520 total = 0
521 block = bytearray(BLOCK_SIZE)
522 for _ in range(self.amount):
523 size = self.current.readinto(block)
524 self.progbar.update(size)
525 if not size:
526 self.end = True
527 break
528 total += size
529 plength -= size
530 blocks.append(sha256(block[:size]).digest())
531 if self.hybrid:
532 piece.update(block[:size])
533 if not blocks:
534 self._calculate_root()
535 raise StopIteration
536 if len(blocks) != self.amount:
537 padding = self._pad_remaining(len(blocks))
538 blocks.extend(padding)
539 layer_hash = merkle_root(blocks)
540 self.layer_hashes.append(layer_hash)
541 self.cb(layer_hash)
542 if self.end:
543 if self.progress == 1:
544 self.progbar.close_out()
545 self._calculate_root()
546 if self.hybrid:
547 if plength > 0:
548 self.padding_file = {
549 "attr": "p",
550 "length": plength,
551 "path": [".pad", str(plength)],
552 }
553 piece.update(bytes(plength))
554 piece = piece.digest()
555 self.pieces.append(piece)
556 return layer_hash, piece
557 return layer_hash
559 def _calculate_root(self):
560 """
561 Calculate the root hash for opened file.
562 """
563 self.piece_layer = b"".join(self.layer_hashes)
565 if len(self.layer_hashes) > 1:
566 pad_piece = merkle_root([bytes(32) for _ in range(self.amount)])
568 pow2 = next_power_2(len(self.layer_hashes))
569 remainder = pow2 - len(self.layer_hashes)
571 self.layer_hashes += [pad_piece for _ in range(remainder)]
572 self.root = merkle_root(self.layer_hashes)
573 self.current.close()