Coverage for torrentfile\recheck.py: 100%
278 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-27 21:50 -0700
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-27 21:50 -0700
1#! /usr/bin/python3
2# -*- coding: utf-8 -*-
4##############################################################################
5# Copyright (C) 2021-current alexpdev
6#
7# Licensed under the Apache License, Version 2.0 (the "License");
8# you may not use this file except in compliance with the License.
9# You may obtain a copy of the License at
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS,
15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18##############################################################################
19"""
20Module container Checker Class.
22The CheckerClass takes a torrentfile and tha path to it's contents.
23It will then iterate through every file and directory contained
24and compare their data to values contained within the torrent file.
25Completion percentages will be printed to screen for each file and
26at the end for the torrentfile as a whole.
27"""
29import os
30import logging
31from hashlib import sha1, sha256 # nosec
32from pathlib import Path
34import pyben
36from torrentfile.hasher import FileHasher
37from torrentfile.mixins import ProgMixin
38from torrentfile.utils import ArgumentError, MissingPathError
40SHA1 = 20
41SHA256 = 32
42BLOCK_SIZE = 2**14 # 16KiB
44logger = logging.getLogger(__name__)
47class Checker:
48 """
49 Check a given file or directory to see if it matches a torrentfile.
51 Public constructor for Checker class instance.
53 Parameters
54 ----------
55 metafile : str
56 Path to ".torrent" file.
57 path : str
58 Path where the content is located in filesystem.
60 Example
61 -------
62 >> metafile = "/path/to/torrentfile/content_file_or_dir.torrent"
63 >> location = "/path/to/location"
64 >> os.path.exists("/path/to/location/content_file_or_dir")
65 Out: True
66 >> checker = Checker(metafile, location)
67 """
69 _hook = None
71 def __init__(self, metafile: str, path: str):
72 """
73 Validate data against hashes contained in .torrent file.
75 Parameters
76 ----------
77 metafile : str
78 path to .torrent file
79 path : str
80 path to content or contents parent directory.
81 """
82 if not os.path.exists(metafile):
83 raise FileNotFoundError
84 if os.path.isdir(metafile):
85 raise ArgumentError(
86 "The <metafile> must be a .torrent file. Not a directory")
87 self.last_log = None
88 self.log_msg("Checking: %s, %s", metafile, path)
89 self.metafile = metafile
90 self.total = 0
91 self.paths = []
92 self.fileinfo = {}
93 print("Extracting data from torrent file...")
94 self.meta = pyben.load(metafile)
95 self.info = self.meta["info"]
96 self.name = self.info["name"]
97 self.piece_length = self.info["piece length"]
99 if "meta version" in self.info:
100 if "pieces" in self.info:
101 self.meta_version = 3
102 else:
103 self.meta_version = 2
104 else:
105 self.meta_version = 1
107 self.root = self.find_root(path)
108 self.check_paths()
110 @classmethod
111 def register_callback(cls, hook):
112 """
113 Register hooks from 3rd party programs to access generated info.
115 Parameters
116 ----------
117 hook : function
118 callback function for the logging feature.
119 """
120 cls._hook = hook
122 def piece_checker(self):
123 """
124 Check individual pieces of the torrent.
126 Returns
127 -------
128 HashChecker | FeedChecker
129 Individual piece hasher.
130 """
131 if self.meta_version == 1:
132 return FeedChecker
133 return HashChecker
135 def results(self):
136 """
137 Generate result percentage and store for future calls.
138 """
139 responses = []
140 for response in self.iter_hashes():
141 responses.append(response)
143 self.log_msg("Final result for %s recheck: %s", self.metafile,
144 self._result)
146 return self._result
148 def log_msg(self, *args, level: int = logging.INFO):
149 """
150 Log message `msg` to logger and send `msg` to callback hook.
152 Parameters
153 ----------
154 *args : dict
155 formatting args for log message
156 level : int
157 Log level for this message; default=`logging.INFO`
158 """
159 message = args[0]
160 if len(args) >= 3:
161 message = message % tuple(args[1:])
162 elif len(args) == 2:
163 message = message % args[1]
165 # Repeat log messages should be ignored.
166 if message != self.last_log:
167 self.last_log = message
168 logger.log(level, message)
169 if self._hook and level == logging.INFO:
170 self._hook(message)
172 def find_root(self, path: str) -> str:
173 """
174 Check path for torrent content.
176 The path can be a relative or absolute filesystem path. In the case
177 where the content is a single file, the path may point directly to the
178 the file, or it may point to the parent directory. If content points
179 to a directory. The directory will be checked to see if it matches
180 the torrent's name, if not the directories contents will be searched.
181 The returned value will be the absolute path that matches the torrent's
182 name.
184 Parameters
185 ----------
186 path : str
187 root path to torrent content
189 Returns
190 -------
191 str
192 root path to content
193 """
194 if not os.path.exists(path):
195 self.log_msg("Could not locate torrent content %s.", path)
196 raise FileNotFoundError(path)
198 root = Path(path)
199 if root.name == self.name:
200 self.log_msg("Content found: %s.", str(root))
201 return root
203 if self.name in os.listdir(root):
204 return root / self.name
206 self.log_msg("Could not locate torrent content in: %s", str(root))
207 raise FileNotFoundError(root)
209 def check_paths(self):
210 """
211 Gather all file paths described in the torrent file.
212 """
213 finfo = self.fileinfo
215 if "length" in self.info:
216 self.log_msg("%s points to a single file", self.root)
217 self.total = self.info["length"]
218 self.paths.append(str(self.root))
220 finfo[0] = {
221 "path": self.root,
222 "length": self.info["length"],
223 }
225 if self.meta_version > 1:
226 root = self.info["file tree"][self.name][""]["pieces root"]
227 finfo[0]["pieces root"] = root
229 return
231 # Otherwise Content is more than 1 file.
232 self.log_msg("%s points to a directory", self.root)
233 if self.meta_version == 1:
234 for i, item in enumerate(self.info["files"]):
235 self.total += item["length"]
236 base = os.path.join(*item["path"])
238 self.fileinfo[i] = {
239 "path": str(self.root / base),
240 "length": item["length"],
241 }
243 self.paths.append(str(self.root / base))
244 return
246 self.walk_file_tree(self.info["file tree"], [])
248 def walk_file_tree(self, tree: dict, partials: list):
249 """
250 Traverse File Tree dictionary to get file details.
252 Extract full pathnames, length, root hash, and layer hashes
253 for each file included in the .torrent's file tree.
255 Parameters
256 ----------
257 tree : dict
258 File Tree dict extracted from torrent file.
259 partials : list
260 list of intermediate pathnames.
261 """
262 for key, val in tree.items():
263 # Empty string means the tree's leaf is value
264 if "" in val:
265 base = os.path.join(*partials, key)
266 roothash = None
267 length = val[""]["length"]
268 roothash = None if not length else val[""]["pieces root"]
269 full = str(self.root / base)
270 self.fileinfo[len(self.paths)] = {
271 "path": full,
272 "length": length,
273 "pieces root": roothash,
274 }
275 self.paths.append(full)
276 self.total += length
277 else:
278 self.walk_file_tree(val, partials + [key])
280 def iter_hashes(self) -> tuple:
281 """
282 Produce results of comparing torrent contents piece by piece.
284 Yields
285 ------
286 chunck : bytes
287 hash of data found on disk
288 piece : bytes
289 hash of data when complete and correct
290 path : str
291 path to file being hashed
292 size : int
293 length of bytes hashed for piece
294 """
295 matched = consumed = 0
296 checker = self.piece_checker()
297 for chunk, piece, path, size in checker(self):
298 consumed += size
299 matching = 0
300 if chunk == piece:
301 matching += size
302 matched += size
303 yield chunk, piece, path, size
304 total_consumed = str(int(consumed / self.total * 100))
305 percent_matched = str(int(matched / consumed * 100))
306 self.log_msg(
307 "Processed: %s%%, Matched: %s%%",
308 total_consumed,
309 percent_matched,
310 )
311 self._result = (matched / consumed) * 100 if consumed > 0 else 0
314class FeedChecker(ProgMixin):
315 """
316 Validates torrent content.
318 Seemlesly validate torrent file contents by comparing hashes in
319 metafile against data on disk.
321 Parameters
322 ----------
323 checker : object
324 the checker class instance.
325 """
327 def __init__(self, checker: Checker):
328 """
329 Generate hashes of piece length data from filelist contents.
330 """
331 self.piece_length = checker.piece_length
332 self.paths = checker.paths
333 self.pieces = checker.info["pieces"]
334 self.fileinfo = checker.fileinfo
335 self.piece_map = {}
336 self.index = 0
337 self.piece_count = 0
338 self.it = None
340 def __iter__(self):
341 """
342 Assign iterator and return self.
343 """
344 self.it = self.iter_pieces()
345 return self
347 def __next__(self):
348 """
349 Yield back result of comparison.
350 """
351 try:
352 partial = next(self.it)
353 except StopIteration as itererror:
354 raise StopIteration from itererror
356 chunck = sha1(partial).digest() # nosec
357 start = self.piece_count * SHA1
358 end = start + SHA1
359 piece = self.pieces[start:end]
360 self.piece_count += 1
361 path = self.paths[self.index]
362 return chunck, piece, path, len(partial)
364 def iter_pieces(self):
365 """
366 Iterate through, and hash pieces of torrent contents.
368 Yields
369 ------
370 piece : bytes
371 hash digest for block of torrent data.
372 """
373 partial = bytearray()
374 for i, path in enumerate(self.paths):
375 total = self.fileinfo[i]["length"]
376 self.progbar = self.get_progress_tracker(total, path)
377 self.index = i
378 if os.path.exists(path):
379 for piece in self.extract(path, partial):
380 if (len(piece) == self.piece_length) or (i + 1 == len(
381 self.paths)):
382 yield piece
383 else:
384 partial = piece
386 else:
387 length = self.fileinfo[i]["length"]
388 for pad in self._gen_padding(partial, length):
389 if len(pad) == self.piece_length:
390 yield pad
391 else:
392 partial = pad
393 self.progbar.close_out()
395 def extract(self, path: str, partial: bytearray) -> bytearray:
396 """
397 Split file paths contents into blocks of data for hash pieces.
399 Parameters
400 ----------
401 path : str
402 path to content.
403 partial : bytes
404 any remaining content from last file.
406 Returns
407 -------
408 bytearray
409 Hash digest for block of .torrent contents.
410 """
411 read = 0
412 length = self.fileinfo[self.index]["length"]
413 partial = bytearray() if len(partial) == self.piece_length else partial
414 if path not in self.paths: # pragma: no cover
415 raise MissingPathError(path)
416 with open(path, "rb") as current:
417 while True:
418 bitlength = self.piece_length - len(partial)
419 part = bytearray(bitlength)
420 amount = current.readinto(part)
421 read += amount
422 partial.extend(part[:amount])
423 if amount < bitlength:
424 if amount > 0 and read == length:
425 self.progbar.update(amount)
426 yield partial
427 break
428 self.progbar.update(amount)
429 yield partial
430 partial = bytearray(0)
431 if length != read:
432 for pad in self._gen_padding(partial, length, read):
433 yield pad
435 def _gen_padding(self, partial: bytes, length: int, read=0) -> bytes:
436 """
437 Create padded pieces where file sizes do not match.
439 Parameters
440 ----------
441 partial : bytes
442 any remaining data from last file processed.
443 length : int
444 size of space that needs padding
445 read : int
446 portion of length already padded
448 Yields
449 ------
450 bytes
451 A piece length sized block of zeros.
452 """
453 while read < length:
454 left = self.piece_length - len(partial)
455 if length - read > left:
456 padding = bytearray(left)
457 partial.extend(padding)
458 yield partial
459 read += left
460 partial = bytearray(0)
461 else:
462 partial.extend(bytearray(length - read))
463 read = length
464 yield partial
467class HashChecker(ProgMixin):
468 """
469 Iterate through contents of meta data and verify with file contents.
471 Parameters
472 ----------
473 checker : Checker
474 the checker instance that maintains variables.
475 """
477 def __init__(self, checker: Checker):
478 """
479 Construct a HybridChecker instance.
480 """
481 self.checker = checker
482 self.paths = checker.paths
483 self.piece_length = checker.piece_length
484 self.fileinfo = checker.fileinfo
485 self.piece_layers = checker.meta["piece layers"]
486 self.current = None
487 self.index = -1
489 def __iter__(self):
490 """
491 Assign iterator and return self.
492 """
493 return self
495 def __next__(self):
496 """
497 Provide the result of comparison.
498 """
499 if self.current is None:
500 self.next_file()
501 try:
502 return self.process_current()
503 except StopIteration as itererr:
504 if self.next_file():
505 return self.process_current()
506 raise StopIteration from itererr
508 class Padder:
509 """
510 Padding class to generate padding hashes wherever needed.
512 Parameters
513 ----------
514 length: int
515 the total size of the mock file generating padding for.
516 piece_length : int
517 the block size that each hash represents.
518 """
520 def __init__(self, length, piece_length):
521 """
522 Construct padding class to Mock missing or incomplete files.
524 Parameters
525 ----------
526 length : int
527 size of the file
528 piece_length : int
529 the piece length for each iteration.
530 """
531 self.length = length
532 self.piece_length = piece_length
533 self.pad = sha256(bytearray(piece_length)).digest()
535 def __iter__(self):
536 """
537 Return self to correctly implement iterator type.
538 """
539 return self # pragma: nocover
541 def __next__(self) -> bytes:
542 """
543 Iterate through seemingly endless sha256 hashes of zeros.
545 Returns
546 -------
547 tuple :
548 returns the padding
550 Raises
551 ------
552 StopIteration
553 """
554 if self.length >= self.piece_length:
555 self.length -= self.piece_length
556 return self.pad
557 if self.length > 0:
558 pad = sha256(bytearray(self.length)).digest()
559 self.length -= self.length
560 return pad
561 raise StopIteration
563 def next_file(self) -> bool:
564 """
565 Remove all references to processed files and prepare for the next.
567 Returns
568 -------
569 bool
570 if there is a next file found
571 """
572 self.index += 1
573 if self.current is None or self.index < len(self.paths):
574 self.current = self.paths[self.index]
575 self.length = self.fileinfo[self.index]["length"]
576 self.root_hash = self.fileinfo[self.index]["pieces root"]
577 if self.length > self.piece_length:
578 self.pieces = self.piece_layers[self.root_hash]
579 else:
580 self.pieces = self.root_hash
581 path = self.paths[self.index]
582 self.progbar = self.get_progress_tracker(self.length, path)
583 self.count = 0
584 if os.path.exists(self.current):
585 self.hasher = FileHasher(
586 path,
587 self.piece_length,
588 progress=2,
589 progress_bar=self.progbar,
590 )
591 else:
592 self.hasher = self.Padder(self.length, self.piece_length)
593 return True
594 if self.index >= len(self.paths):
595 del self.current
596 del self.length
597 del self.root_hash
598 del self.pieces
599 return False
601 def process_current(self) -> tuple:
602 """
603 Gather necessary information to compare to metafile details.
605 Returns
606 -------
607 tuple
608 a tuple containing the layer, piece, current path and size
610 Raises
611 ------
612 StopIteration
613 """
614 try:
615 layer = next(self.hasher)
616 piece, size = self.advance()
617 self.progbar.update(size)
618 return layer, piece, self.current, size
619 except StopIteration as err:
620 if self.length > 0 and self.count * SHA256 < len(self.pieces):
621 self.hasher = self.Padder(self.length, self.piece_length)
622 piece, size = self.advance()
623 layer = next(self.hasher)
624 self.progbar.update(0)
625 return layer, piece, self.current, size
626 raise StopIteration from err
628 def advance(self) -> tuple:
629 """
630 Increment the number of pieces processed for the current file.
632 Returns
633 -------
634 tuple
635 the piece and size
636 """
637 start = self.count * SHA256
638 end = start + SHA256
639 piece = self.pieces[start:end]
640 self.count += 1
641 if self.length >= self.piece_length:
642 self.length -= self.piece_length
643 size = self.piece_length
644 else:
645 size = self.length
646 self.length -= self.length
647 return piece, size