Coverage for torrentfile\hasher.py: 100%

261 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-27 21:50 -0700

1#! /usr/bin/python3 

2# -*- coding: utf-8 -*- 

3 

4############################################################################## 

5# Copyright (C) 2021-current alexpdev 

6# 

7# Licensed under the Apache License, Version 2.0 (the "License"); 

8# you may not use this file except in compliance with the License. 

9# You may obtain a copy of the License at 

10# 

11# http://www.apache.org/licenses/LICENSE-2.0 

12# 

13# Unless required by applicable law or agreed to in writing, software 

14# distributed under the License is distributed on an "AS IS" BASIS, 

15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

16# See the License for the specific language governing permissions and 

17# limitations under the License. 

18############################################################################## 

19""" 

20Piece/File Hashers for Bittorrent meta file contents. 

21""" 

22 

23import os 

24import logging 

25from hashlib import sha1, sha256 # nosec 

26 

27from torrentfile.mixins import CbMixin, ProgMixin 

28from torrentfile.utils import next_power_2 

29 

30BLOCK_SIZE = 2**14 # 16KiB 

31HASH_SIZE = 32 

32 

33logger = logging.getLogger(__name__) 

34 

35 

36class Hasher(CbMixin, ProgMixin): 

37 """ 

38 Piece hasher for Bittorrent V1 files. 

39 

40 Takes a sorted list of all file paths, calculates sha1 hash 

41 for fixed size pieces of file data from each file 

42 seemlessly until the last piece which may be smaller than others. 

43 

44 Parameters 

45 ---------- 

46 paths : list 

47 List of files. 

48 piece_length : int 

49 Size of chuncks to split the data into. 

50 align: bool 

51 flag to indicate if the torrent should be piece aligned 

52 progress: int 

53 the progress mode 

54 progress_bar: [Optional] ProgressBar 

55 a progress bar object if progress mode is 2 

56 """ 

57 

58 def __init__( 

59 self, 

60 paths: list, 

61 piece_length: int, 

62 align: bool = False, 

63 progress: int = 1, 

64 progress_bar=None, 

65 ): 

66 """Generate hashes of piece length data from filelist contents.""" 

67 self.piece_length = piece_length 

68 self.paths = paths 

69 self.align = align 

70 self.total = sum(os.path.getsize(i) for i in self.paths) 

71 self.index = 0 

72 self.current = open(self.paths[0], "rb") 

73 self.progress = progress 

74 self.progbar = progress_bar 

75 if self.progress == 1: 

76 file_size = os.path.getsize(self.paths[0]) 

77 self.progbar = self.get_progress_tracker(file_size, self.paths[0]) 

78 logger.debug("Hashing %s", str(self.paths[0])) 

79 

80 def __iter__(self): 

81 """ 

82 Iterate through feed pieces. 

83 

84 Returns 

85 ------- 

86 self : iterator 

87 Iterator for leaves/hash pieces. 

88 """ 

89 return self 

90 

91 def _handle_partial(self, arr: bytearray) -> bytearray: 

92 """ 

93 Define the handling partial pieces that span 2 or more files. 

94 

95 Parameters 

96 ---------- 

97 arr : bytearray 

98 Incomplete piece containing partial data 

99 

100 Returns 

101 ------- 

102 digest : bytearray 

103 SHA1 digest of the complete piece. 

104 """ 

105 if self.align: 

106 target = self.piece_length - len(arr) 

107 temp = bytearray(target) 

108 arr.extend(temp) 

109 return sha1(arr).digest() # nosec 

110 

111 while len(arr) < self.piece_length and self.next_file(): 

112 target = self.piece_length - len(arr) 

113 temp = bytearray(target) 

114 size = self.current.readinto(temp) 

115 self.progbar.update(size) 

116 arr.extend(temp[:size]) 

117 if size == target: 

118 break 

119 return sha1(arr).digest() # nosec 

120 

121 def next_file(self) -> bool: 

122 """ 

123 Seemlessly transition to next file in file list. 

124 

125 Returns 

126 ------- 

127 bool: 

128 True if there is a next file otherwise False. 

129 """ 

130 self.index += 1 

131 if self.progress == 1: 

132 self.progbar.close_out() 

133 if self.index < len(self.paths): 

134 path = self.paths[self.index] 

135 if self.progress == 1: 

136 total = os.path.getsize(path) 

137 self.progbar = self.get_progress_tracker(total, path) 

138 logger.debug("Hashing %s", str(path)) 

139 self.current.close() 

140 self.current = open(path, "rb") 

141 return True 

142 return False 

143 

144 def __next__(self) -> bytes: 

145 """ 

146 Generate piece-length pieces of data from input file list. 

147 

148 Returns 

149 ------- 

150 bytes 

151 SHA1 hash of the piece extracted. 

152 """ 

153 while True: 

154 piece = bytearray(self.piece_length) 

155 size = self.current.readinto(piece) 

156 self.progbar.update(size) 

157 if size == 0: 

158 if not self.next_file(): 

159 raise StopIteration 

160 elif size < self.piece_length: 

161 return self._handle_partial(piece[:size]) 

162 else: 

163 return sha1(piece).digest() # nosec 

164 

165 

166def merkle_root(blocks: list) -> bytes: 

167 """ 

168 Calculate the merkle root for a seq of sha256 hash digests. 

169 

170 Parameters 

171 ---------- 

172 blocks : list 

173 a sequence of sha256 layer hashes. 

174 

175 Returns 

176 ------- 

177 bytes 

178 the sha256 root hash of the merkle tree. 

179 """ 

180 if blocks: 

181 while len(blocks) > 1: 

182 blocks = [ 

183 sha256(x + y).digest() for x, y in zip(*[iter(blocks)] * 2) 

184 ] 

185 return blocks[0] 

186 return blocks 

187 

188 

189class HasherV2(CbMixin, ProgMixin): 

190 """ 

191 Calculate the root hash and piece layers for file contents. 

192 

193 Iterates over 16KiB blocks of data from given file, hashes the data, 

194 then creates a hash tree from the individual block hashes until size of 

195 hashed data equals the piece-length. Then continues the hash tree until 

196 root hash is calculated. 

197 

198 Parameters 

199 ---------- 

200 path : str 

201 Path to file. 

202 piece_length : int 

203 Size of layer hashes pieces. 

204 progress: int 

205 the progress mode 

206 progress_bar: [Optional] ProgressBar 

207 a progress bar object if progress mode is 2 

208 """ 

209 

210 def __init__( 

211 self, 

212 path: str, 

213 piece_length: int, 

214 progress: int = 1, 

215 progress_bar=None, 

216 ): 

217 """ 

218 Calculate and store hash information for specific file. 

219 """ 

220 self.path = path 

221 self.root = None 

222 self.piece_layer = None 

223 self.layer_hashes = [] 

224 self.piece_length = piece_length 

225 self.num_blocks = piece_length // BLOCK_SIZE 

226 self.progress = progress 

227 self.progbar = progress_bar 

228 if self.progress == 1: 

229 size = os.path.getsize(self.path) 

230 self.progbar = self.get_progress_tracker(size, self.path) 

231 with open(self.path, "rb") as fd: 

232 self.process_file(fd) 

233 

234 def process_file(self, fd: str): 

235 """ 

236 Calculate hashes over 16KiB chuncks of file content. 

237 

238 Parameters 

239 ---------- 

240 fd : BytesIO 

241 Opened file in read mode. 

242 """ 

243 while True: 

244 blocks = [] 

245 leaf = bytearray(BLOCK_SIZE) 

246 # generate leaves of merkle tree 

247 

248 for _ in range(self.num_blocks): 

249 size = fd.readinto(leaf) 

250 if not size: 

251 break 

252 self.progbar.update(size) 

253 blocks.append(sha256(leaf[:size]).digest()) 

254 

255 # blocks is empty mean eof 

256 if not blocks: 

257 break 

258 if len(blocks) != self.num_blocks: 

259 # when size of file doesn't fill the last block 

260 # when the file contains multiple pieces 

261 remaining = self.num_blocks - len(blocks) 

262 if not self.layer_hashes: 

263 # when the there is only one block for file 

264 power2 = next_power_2(len(blocks)) 

265 remaining = power2 - len(blocks) 

266 

267 # pad the the rest with zeroes to fill remaining space. 

268 padding = [bytes(32) for _ in range(remaining)] 

269 blocks.extend(padding) 

270 # calculate the root hash for the merkle tree up to piece-length 

271 

272 layer_hash = merkle_root(blocks) 

273 self.cb(layer_hash) 

274 self.layer_hashes.append(layer_hash) 

275 if self.progress == 1: 

276 self.progbar.close_out() 

277 self._calculate_root() 

278 

279 def _calculate_root(self): 

280 """ 

281 Calculate root hash for the target file. 

282 """ 

283 self.piece_layer = b"".join(self.layer_hashes) 

284 hashes = len(self.layer_hashes) 

285 if hashes > 1: 

286 pow2 = next_power_2(hashes) 

287 remainder = pow2 - hashes 

288 pad_piece = [bytes(HASH_SIZE) for _ in range(self.num_blocks)] 

289 for _ in range(remainder): 

290 self.layer_hashes.append(merkle_root(pad_piece)) 

291 self.root = merkle_root(self.layer_hashes) 

292 

293 

294class HasherHybrid(CbMixin, ProgMixin): 

295 """ 

296 Calculate root and piece hashes for creating hybrid torrent file. 

297 

298 Create merkle tree layers from sha256 hashed 16KiB blocks of contents. 

299 With a branching factor of 2, merge layer hashes until blocks equal 

300 piece_length bytes for the piece layer, and then the root hash. 

301 

302 Parameters 

303 ---------- 

304 path : str 

305 path to target file. 

306 piece_length : int 

307 piece length for data chunks. 

308 progress: int 

309 the progress mode 

310 progress_bar: [Optional] ProgressBar 

311 a progress bar object if progress mode is 2 

312 """ 

313 

314 def __init__( 

315 self, 

316 path: str, 

317 piece_length: int, 

318 progress: int = 1, 

319 progress_bar=None, 

320 ): 

321 """ 

322 Construct Hasher class instances for each file in torrent. 

323 """ 

324 self.path = path 

325 self.piece_length = piece_length 

326 self.pieces = [] 

327 self.layer_hashes = [] 

328 self.piece_layer = None 

329 self.root = None 

330 self.padding_piece = None 

331 self.padding_file = None 

332 self.amount = piece_length // BLOCK_SIZE 

333 self.progress = progress 

334 self.progbar = progress_bar 

335 if self.progress == 1: 

336 size = os.path.getsize(self.path) 

337 self.progbar = self.get_progress_tracker(size, self.path) 

338 with open(path, "rb") as data: 

339 self.process_file(data) 

340 

341 def _pad_remaining(self, block_count: int): 

342 """ 

343 Generate Hash sized, 0 filled bytes for padding. 

344 

345 Parameters 

346 ---------- 

347 block_count : int 

348 current total number of blocks collected. 

349 

350 Returns 

351 ------- 

352 padding : bytes 

353 Padding to fill remaining portion of tree. 

354 """ 

355 # when the there is only one block for file 

356 remaining = self.amount - block_count 

357 if not self.layer_hashes: 

358 power2 = next_power_2(block_count) 

359 remaining = power2 - block_count 

360 return [bytes(HASH_SIZE) for _ in range(remaining)] 

361 

362 def process_file(self, data: bytearray): 

363 """ 

364 Calculate layer hashes for contents of file. 

365 

366 Parameters 

367 ---------- 

368 data : BytesIO 

369 File opened in read mode. 

370 """ 

371 while True: 

372 plength = self.piece_length 

373 blocks = [] 

374 piece = sha1() # nosec 

375 total = 0 

376 block = bytearray(BLOCK_SIZE) 

377 for _ in range(self.amount): 

378 size = data.readinto(block) 

379 self.progbar.update(size) 

380 if not size: 

381 break 

382 total += size 

383 plength -= size 

384 blocks.append(sha256(block[:size]).digest()) 

385 piece.update(block[:size]) 

386 if not blocks: 

387 break 

388 if len(blocks) != self.amount: 

389 padding = self._pad_remaining(len(blocks)) 

390 blocks.extend(padding) 

391 layer_hash = merkle_root(blocks) 

392 self.cb(layer_hash) 

393 self.layer_hashes.append(layer_hash) 

394 if plength > 0: 

395 self.padding_file = { 

396 "attr": "p", 

397 "length": plength, 

398 "path": [".pad", str(plength)], 

399 } 

400 piece.update(bytes(plength)) 

401 self.pieces.append(piece.digest()) # nosec 

402 if self.progress == 1: 

403 self.progbar.close_out() 

404 self._calculate_root() 

405 

406 def _calculate_root(self): 

407 """ 

408 Calculate the root hash for opened file. 

409 

410 **DEPRECATED** 

411 """ 

412 self.piece_layer = b"".join(self.layer_hashes) 

413 

414 if len(self.layer_hashes) > 1: 

415 pad_piece = merkle_root([bytes(32) for _ in range(self.amount)]) 

416 

417 pow2 = next_power_2(len(self.layer_hashes)) 

418 remainder = pow2 - len(self.layer_hashes) 

419 

420 self.layer_hashes += [pad_piece for _ in range(remainder)] 

421 self.root = merkle_root(self.layer_hashes) 

422 

423 

424class FileHasher(CbMixin, ProgMixin): 

425 """ 

426 Calculate root and piece hashes for creating hybrid torrent file. 

427 

428 Create merkle tree layers from sha256 hashed 16KiB blocks of contents. 

429 With a branching factor of 2, merge layer hashes until blocks equal 

430 piece_length bytes for the piece layer, and then the root hash. 

431 

432 Parameters 

433 ---------- 

434 path : str 

435 path to target file. 

436 piece_length : int 

437 piece length for data chunks. 

438 hybrid : bool 

439 flag to indicate if it's a hybrid torrent 

440 progress: int 

441 the progress mode 

442 progress_bar: [Optional] ProgressBar 

443 a progress bar object if progress mode is 2 

444 """ 

445 

446 def __init__( 

447 self, 

448 path: str, 

449 piece_length: int, 

450 progress: int = 1, 

451 hybrid: bool = False, 

452 progress_bar=None, 

453 ): 

454 """ 

455 Construct Hasher class instances for each file in torrent. 

456 """ 

457 self.path = path 

458 self.piece_length = piece_length 

459 self.pieces = [] 

460 self.layer_hashes = [] 

461 self.piece_layer = None 

462 self.root = None 

463 self.padding_piece = None 

464 self.padding_file = None 

465 self.amount = piece_length // BLOCK_SIZE 

466 self.end = False 

467 self.progress = progress 

468 self.progbar = progress_bar 

469 if self.progress == 1: 

470 size = os.path.getsize(self.path) 

471 self.progbar = self.get_progress_tracker(size, self.path) 

472 self.current = open(path, "rb") 

473 self.hybrid = hybrid 

474 

475 def __iter__(self): 

476 """Return `self`: needed to implement iterator implementation.""" 

477 return self 

478 

479 def _pad_remaining(self, block_count: int): 

480 """ 

481 Generate Hash sized, 0 filled bytes for padding. 

482 

483 Parameters 

484 ---------- 

485 block_count : int 

486 current total number of blocks collected. 

487 

488 Returns 

489 ------- 

490 padding : bytes 

491 Padding to fill remaining portion of tree. 

492 """ 

493 # when the there is only one block for file 

494 remaining = self.amount - block_count 

495 if not self.layer_hashes: 

496 power2 = next_power_2(block_count) 

497 remaining = power2 - block_count 

498 return [bytes(HASH_SIZE) for _ in range(remaining)] 

499 

500 def __next__(self) -> bytes: 

501 """ 

502 Calculate layer hashes for contents of file. 

503 

504 Returns 

505 ------- 

506 bytes 

507 The layer merckle root hash. 

508 

509 Raises 

510 ------ 

511 StopIteration 

512 Halts the iterator from progressing 

513 """ 

514 if self.end: 

515 self.end = False 

516 raise StopIteration 

517 plength = self.piece_length 

518 blocks = [] 

519 piece = sha1() # nosec 

520 total = 0 

521 block = bytearray(BLOCK_SIZE) 

522 for _ in range(self.amount): 

523 size = self.current.readinto(block) 

524 self.progbar.update(size) 

525 if not size: 

526 self.end = True 

527 break 

528 total += size 

529 plength -= size 

530 blocks.append(sha256(block[:size]).digest()) 

531 if self.hybrid: 

532 piece.update(block[:size]) 

533 if not blocks: 

534 self._calculate_root() 

535 raise StopIteration 

536 if len(blocks) != self.amount: 

537 padding = self._pad_remaining(len(blocks)) 

538 blocks.extend(padding) 

539 layer_hash = merkle_root(blocks) 

540 self.layer_hashes.append(layer_hash) 

541 self.cb(layer_hash) 

542 if self.end: 

543 if self.progress == 1: 

544 self.progbar.close_out() 

545 self._calculate_root() 

546 if self.hybrid: 

547 if plength > 0: 

548 self.padding_file = { 

549 "attr": "p", 

550 "length": plength, 

551 "path": [".pad", str(plength)], 

552 } 

553 piece.update(bytes(plength)) 

554 piece = piece.digest() 

555 self.pieces.append(piece) 

556 return layer_hash, piece 

557 return layer_hash 

558 

559 def _calculate_root(self): 

560 """ 

561 Calculate the root hash for opened file. 

562 """ 

563 self.piece_layer = b"".join(self.layer_hashes) 

564 

565 if len(self.layer_hashes) > 1: 

566 pad_piece = merkle_root([bytes(32) for _ in range(self.amount)]) 

567 

568 pow2 = next_power_2(len(self.layer_hashes)) 

569 remainder = pow2 - len(self.layer_hashes) 

570 

571 self.layer_hashes += [pad_piece for _ in range(remainder)] 

572 self.root = merkle_root(self.layer_hashes) 

573 self.current.close()