Skip to content

dedup

Bases: FileOperation, FileRemoverMixin

Source code in file_operations/deduplicate.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
class DedupOperation(FileOperation, FileRemoverMixin):
    def __init__(self, **kwargs):
        """
        find duplicate files in source folder
        :param kwargs: params from CLI
        """
        super().__init__(**kwargs)
        self.mapping = {
            Constants.image: ImageComparer
        }

        self.filetype = kwargs.get("filetype", self.settings.filetype)
        self.method = kwargs.get("method", self.settings.method)
        self.remove = kwargs.get("remove", self.settings.remove)
        self.comparer: ImageComparer = self.mapping[self.filetype](self.settings)

    @staticmethod
    def add_arguments(settings: AppSettings, parser: argparse.ArgumentParser) -> None:
        parser.add_argument(
            Arguments.threshold,
            help=HelpStrings.threshold,
            default=settings.hash_threshold
        )
        parser.add_argument(
            Arguments.filetype,
            help=HelpStrings.filetype,
            default=settings.filetype
        )
        parser.add_argument(
            Arguments.method, Arguments.m,
            help=HelpStrings.method,
            default=settings.method
        )
        parser.add_argument(
            Arguments.remove, Arguments.rm,
            help=HelpStrings.remove,
            action="store_true"
        )
        parser.add_argument(
            Arguments.core_size,
            help=HelpStrings.core_size,
            default=settings.core_size
        )
        parser.add_argument(
            Arguments.n_jobs,
            help=HelpStrings.n_jobs,
            default=settings.n_jobs
        )
        parser.add_argument(
            Arguments.cache_name,
            help=HelpStrings.cache_name,
            default=None
        )

    def do_task(self):
        """
        find duplicate files in source folder, ask user to delete them if remove
        """
        duplicates = self.comparer.compare(self.files_for_task)
        self.logger.info(f"Found {len(duplicates)} duplicates in {len(self.files_for_task)} files")

        if len(duplicates) > 0 and self.confirm_removing():
                self._remove_all(duplicates)

    def confirm_removing(self) -> bool:
        """check if user wants to remove duplicates"""
        if not self.remove:
            user_choice = input("for deleting founded duplicate files type 'delete': ")
            return user_choice.lower() in self.settings.confirm_choice
        return True

__init__(**kwargs)

find duplicate files in source folder :param kwargs: params from CLI

Source code in file_operations/deduplicate.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def __init__(self, **kwargs):
    """
    find duplicate files in source folder
    :param kwargs: params from CLI
    """
    super().__init__(**kwargs)
    self.mapping = {
        Constants.image: ImageComparer
    }

    self.filetype = kwargs.get("filetype", self.settings.filetype)
    self.method = kwargs.get("method", self.settings.method)
    self.remove = kwargs.get("remove", self.settings.remove)
    self.comparer: ImageComparer = self.mapping[self.filetype](self.settings)

confirm_removing()

check if user wants to remove duplicates

Source code in file_operations/deduplicate.py
75
76
77
78
79
80
def confirm_removing(self) -> bool:
    """check if user wants to remove duplicates"""
    if not self.remove:
        user_choice = input("for deleting founded duplicate files type 'delete': ")
        return user_choice.lower() in self.settings.confirm_choice
    return True

do_task()

find duplicate files in source folder, ask user to delete them if remove

Source code in file_operations/deduplicate.py
65
66
67
68
69
70
71
72
73
def do_task(self):
    """
    find duplicate files in source folder, ask user to delete them if remove
    """
    duplicates = self.comparer.compare(self.files_for_task)
    self.logger.info(f"Found {len(duplicates)} duplicates in {len(self.files_for_task)} files")

    if len(duplicates) > 0 and self.confirm_removing():
            self._remove_all(duplicates)