Skip to content

Hasher

hasher #

Piece/File Hashers for Bittorrent meta file contents.

FileHasher(path: str, piece_length: int, progress: int = 1, hybrid: bool = False, progress_bar = None) #

Bases: CbMixin, ProgMixin

Calculate root and piece hashes for creating hybrid torrent file.

Create merkle tree layers from sha256 hashed 16KiB blocks of contents. With a branching factor of 2, merge layer hashes until blocks equal piece_length bytes for the piece layer, and then the root hash.

PARAMETER DESCRIPTION
path

path to target file.

TYPE: str

piece_length

piece length for data chunks.

TYPE: int

hybrid

flag to indicate if it’s a hybrid torrent

TYPE: bool DEFAULT: False

progress

the progress mode

TYPE: int DEFAULT: 1

progress_bar

a progress bar object if progress mode is 2

DEFAULT: None

Construct Hasher class instances for each file in torrent.

Source code in torrentfile\hasher.py
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
def __init__(
    self,
    path: str,
    piece_length: int,
    progress: int = 1,
    hybrid: bool = False,
    progress_bar=None,
):
    """
    Construct Hasher class instances for each file in torrent.
    """
    self.path = path
    self.piece_length = piece_length
    self.pieces = []
    self.layer_hashes = []
    self.piece_layer = None
    self.root = None
    self.padding_piece = None
    self.padding_file = None
    self.amount = piece_length // BLOCK_SIZE
    self.end = False
    self.progress = progress
    self.progbar = progress_bar
    if self.progress == 1:
        size = os.path.getsize(self.path)
        self.progbar = self.get_progress_tracker(size, self.path)
    self.current = open(path, "rb")
    self.hybrid = hybrid

__iter__() #

Return self: needed to implement iterator implementation.

Source code in torrentfile\hasher.py
475
476
477
def __iter__(self):
    """Return `self`: needed to implement iterator implementation."""
    return self

__next__() -> bytes #

Calculate layer hashes for contents of file.

RETURNS DESCRIPTION
bytes

The layer merckle root hash.

RAISES DESCRIPTION
StopIteration

Halts the iterator from progressing

Source code in torrentfile\hasher.py
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
def __next__(self) -> bytes:
    """
    Calculate layer hashes for contents of file.

    Returns
    -------
    bytes
        The layer merckle root hash.

    Raises
    ------
    StopIteration
        Halts the iterator from progressing
    """
    if self.end:
        self.end = False
        raise StopIteration
    plength = self.piece_length
    blocks = []
    piece = sha1()  # nosec
    total = 0
    block = bytearray(BLOCK_SIZE)
    for _ in range(self.amount):
        size = self.current.readinto(block)
        self.progbar.update(size)
        if not size:
            self.end = True
            break
        total += size
        plength -= size
        blocks.append(sha256(block[:size]).digest())
        if self.hybrid:
            piece.update(block[:size])
    if not blocks:
        self._calculate_root()
        raise StopIteration
    if len(blocks) != self.amount:
        padding = self._pad_remaining(len(blocks))
        blocks.extend(padding)
    layer_hash = merkle_root(blocks)
    self.layer_hashes.append(layer_hash)
    self.cb(layer_hash)
    if self.end:
        if self.progress == 1:
            self.progbar.close_out()
        self._calculate_root()
    if self.hybrid:
        if plength > 0:
            self.padding_file = {
                "attr": "p",
                "length": plength,
                "path": [".pad", str(plength)],
            }
            piece.update(bytes(plength))
        piece = piece.digest()
        self.pieces.append(piece)
        return layer_hash, piece
    return layer_hash

Hasher(paths: list, piece_length: int, align: bool = False, progress: int = 1, progress_bar = None) #

Bases: CbMixin, ProgMixin

Piece hasher for Bittorrent V1 files.

Takes a sorted list of all file paths, calculates sha1 hash for fixed size pieces of file data from each file seemlessly until the last piece which may be smaller than others.

PARAMETER DESCRIPTION
paths

List of files.

TYPE: list

piece_length

Size of chuncks to split the data into.

TYPE: int

align

flag to indicate if the torrent should be piece aligned

TYPE: bool DEFAULT: False

progress

the progress mode

TYPE: int DEFAULT: 1

progress_bar

a progress bar object if progress mode is 2

DEFAULT: None

Generate hashes of piece length data from filelist contents.

Source code in torrentfile\hasher.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def __init__(
    self,
    paths: list,
    piece_length: int,
    align: bool = False,
    progress: int = 1,
    progress_bar=None,
):
    """Generate hashes of piece length data from filelist contents."""
    self.piece_length = piece_length
    self.paths = paths
    self.align = align
    self.total = sum(os.path.getsize(i) for i in self.paths)
    self.index = 0
    self.current = open(self.paths[0], "rb")
    self.progress = progress
    self.progbar = progress_bar
    if self.progress == 1:
        file_size = os.path.getsize(self.paths[0])
        self.progbar = self.get_progress_tracker(file_size, self.paths[0])
    logger.debug("Hashing %s", str(self.paths[0]))

__iter__() #

Iterate through feed pieces.

RETURNS DESCRIPTION
self

Iterator for leaves/hash pieces.

TYPE: iterator

Source code in torrentfile\hasher.py
80
81
82
83
84
85
86
87
88
89
def __iter__(self):
    """
    Iterate through feed pieces.

    Returns
    -------
    self : iterator
        Iterator for leaves/hash pieces.
    """
    return self

__next__() -> bytes #

Generate piece-length pieces of data from input file list.

RETURNS DESCRIPTION
bytes

SHA1 hash of the piece extracted.

Source code in torrentfile\hasher.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def __next__(self) -> bytes:
    """
    Generate piece-length pieces of data from input file list.

    Returns
    -------
    bytes
        SHA1 hash of the piece extracted.
    """
    while True:
        piece = bytearray(self.piece_length)
        size = self.current.readinto(piece)
        self.progbar.update(size)
        if size == 0:
            if not self.next_file():
                raise StopIteration
        elif size < self.piece_length:
            return self._handle_partial(piece[:size])
        else:
            return sha1(piece).digest()  # nosec

next_file() -> bool #

Seemlessly transition to next file in file list.

RETURNS DESCRIPTION
bool

True if there is a next file otherwise False.

TYPE: bool

Source code in torrentfile\hasher.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def next_file(self) -> bool:
    """
    Seemlessly transition to next file in file list.

    Returns
    -------
    bool:
        True if there is a next file otherwise False.
    """
    self.index += 1
    if self.progress == 1:
        self.progbar.close_out()
    if self.index < len(self.paths):
        path = self.paths[self.index]
        if self.progress == 1:
            total = os.path.getsize(path)
            self.progbar = self.get_progress_tracker(total, path)
        logger.debug("Hashing %s", str(path))
        self.current.close()
        self.current = open(path, "rb")
        return True
    return False

HasherHybrid(path: str, piece_length: int, progress: int = 1, progress_bar = None) #

Bases: CbMixin, ProgMixin

Calculate root and piece hashes for creating hybrid torrent file.

Create merkle tree layers from sha256 hashed 16KiB blocks of contents. With a branching factor of 2, merge layer hashes until blocks equal piece_length bytes for the piece layer, and then the root hash.

PARAMETER DESCRIPTION
path

path to target file.

TYPE: str

piece_length

piece length for data chunks.

TYPE: int

progress

the progress mode

TYPE: int DEFAULT: 1

progress_bar

a progress bar object if progress mode is 2

DEFAULT: None

Construct Hasher class instances for each file in torrent.

Source code in torrentfile\hasher.py
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
def __init__(
    self,
    path: str,
    piece_length: int,
    progress: int = 1,
    progress_bar=None,
):
    """
    Construct Hasher class instances for each file in torrent.
    """
    self.path = path
    self.piece_length = piece_length
    self.pieces = []
    self.layer_hashes = []
    self.piece_layer = None
    self.root = None
    self.padding_piece = None
    self.padding_file = None
    self.amount = piece_length // BLOCK_SIZE
    self.progress = progress
    self.progbar = progress_bar
    if self.progress == 1:
        size = os.path.getsize(self.path)
        self.progbar = self.get_progress_tracker(size, self.path)
    with open(path, "rb") as data:
        self.process_file(data)

process_file(data: bytearray) #

Calculate layer hashes for contents of file.

PARAMETER DESCRIPTION
data

File opened in read mode.

TYPE: BytesIO

Source code in torrentfile\hasher.py
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
def process_file(self, data: bytearray):
    """
    Calculate layer hashes for contents of file.

    Parameters
    ----------
    data : BytesIO
        File opened in read mode.
    """
    while True:
        plength = self.piece_length
        blocks = []
        piece = sha1()  # nosec
        total = 0
        block = bytearray(BLOCK_SIZE)
        for _ in range(self.amount):
            size = data.readinto(block)
            self.progbar.update(size)
            if not size:
                break
            total += size
            plength -= size
            blocks.append(sha256(block[:size]).digest())
            piece.update(block[:size])
        if not blocks:
            break
        if len(blocks) != self.amount:
            padding = self._pad_remaining(len(blocks))
            blocks.extend(padding)
        layer_hash = merkle_root(blocks)
        self.cb(layer_hash)
        self.layer_hashes.append(layer_hash)
        if plength > 0:
            self.padding_file = {
                "attr": "p",
                "length": plength,
                "path": [".pad", str(plength)],
            }
            piece.update(bytes(plength))
        self.pieces.append(piece.digest())  # nosec
    if self.progress == 1:
        self.progbar.close_out()
    self._calculate_root()

HasherV2(path: str, piece_length: int, progress: int = 1, progress_bar = None) #

Bases: CbMixin, ProgMixin

Calculate the root hash and piece layers for file contents.

Iterates over 16KiB blocks of data from given file, hashes the data, then creates a hash tree from the individual block hashes until size of hashed data equals the piece-length. Then continues the hash tree until root hash is calculated.

PARAMETER DESCRIPTION
path

Path to file.

TYPE: str

piece_length

Size of layer hashes pieces.

TYPE: int

progress

the progress mode

TYPE: int DEFAULT: 1

progress_bar

a progress bar object if progress mode is 2

DEFAULT: None

Calculate and store hash information for specific file.

Source code in torrentfile\hasher.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def __init__(
    self,
    path: str,
    piece_length: int,
    progress: int = 1,
    progress_bar=None,
):
    """
    Calculate and store hash information for specific file.
    """
    self.path = path
    self.root = None
    self.piece_layer = None
    self.layer_hashes = []
    self.piece_length = piece_length
    self.num_blocks = piece_length // BLOCK_SIZE
    self.progress = progress
    self.progbar = progress_bar
    if self.progress == 1:
        size = os.path.getsize(self.path)
        self.progbar = self.get_progress_tracker(size, self.path)
    with open(self.path, "rb") as fd:
        self.process_file(fd)

process_file(fd: str) #

Calculate hashes over 16KiB chuncks of file content.

PARAMETER DESCRIPTION
fd

Opened file in read mode.

TYPE: BytesIO

Source code in torrentfile\hasher.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
def process_file(self, fd: str):
    """
    Calculate hashes over 16KiB chuncks of file content.

    Parameters
    ----------
    fd : BytesIO
        Opened file in read mode.
    """
    while True:
        blocks = []
        leaf = bytearray(BLOCK_SIZE)
        # generate leaves of merkle tree

        for _ in range(self.num_blocks):
            size = fd.readinto(leaf)
            if not size:
                break
            self.progbar.update(size)
            blocks.append(sha256(leaf[:size]).digest())

        # blocks is empty mean eof
        if not blocks:
            break
        if len(blocks) != self.num_blocks:
            # when size of file doesn't fill the last block
            # when the file contains multiple pieces
            remaining = self.num_blocks - len(blocks)
            if not self.layer_hashes:
                # when the there is only one block for file
                power2 = next_power_2(len(blocks))
                remaining = power2 - len(blocks)

            # pad the the rest with zeroes to fill remaining space.
            padding = [bytes(32) for _ in range(remaining)]
            blocks.extend(padding)
        # calculate the root hash for the merkle tree up to piece-length

        layer_hash = merkle_root(blocks)
        self.cb(layer_hash)
        self.layer_hashes.append(layer_hash)
    if self.progress == 1:
        self.progbar.close_out()
    self._calculate_root()

merkle_root(blocks: list) -> bytes #

Calculate the merkle root for a seq of sha256 hash digests.

PARAMETER DESCRIPTION
blocks

a sequence of sha256 layer hashes.

TYPE: list

RETURNS DESCRIPTION
bytes

the sha256 root hash of the merkle tree.

Source code in torrentfile\hasher.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
def merkle_root(blocks: list) -> bytes:
    """
    Calculate the merkle root for a seq of sha256 hash digests.

    Parameters
    ----------
    blocks : list
        a sequence of sha256 layer hashes.

    Returns
    -------
    bytes
        the sha256 root hash of the merkle tree.
    """
    if blocks:
        while len(blocks) > 1:
            blocks = [
                sha256(x + y).digest() for x, y in zip(*[iter(blocks)] * 2)
            ]
        return blocks[0]
    return blocks