#!/usr/bin/env python ''' This module is a wrapper for the find program. It must be called as if calling find itself. The behavior is the same for most files, except those that are inside the imap folders. For them the module checks if the files are valid and are not marked to be unlinked later by cyrus when the last connection holding the lock to the corresponding mailbox is closed. ''' import os import re import syslog import subprocess import sys class MailFileFilter(object): ''' Helper class to store the filter code, permanent values and data related to the directory of the filename being tested. ''' def __init__(self): self.sudo_cyrus = ['su', 'cyrus', '-s', '/bin/sh', '-c'] self.cyr_getdeleted = '/usr/cyrus/bin/cyr_getdeleted ' self.regex = re.compile('imap-mails/(user/.*?)/') self.current_dir = '' self.current_user = '' self.skip_dir = False self.deleted_files = [] def __get_deleted_files(self): ''' Get a list of deleted files in every mailbox belonging to the user. Basically cyr_getdeleted does a pattern matching and outputs all the files that are set to be unlinked in every mailbox whose name begins with the string passed. e.g.: passing user/admin will return files from user/admin/sent, user/admin/trash, etc. ''' prog = self.sudo_cyrus + [self.cyr_getdeleted + self.current_user] try: self.deleted_files = subprocess.check_output(prog).split('\n') except: # Keep all files on error self.deleted_files = [] syslog.openlog('backup: find_wrapper.py error') syslog.syslog(syslog.LOG_ERR, "Can't get deleted messages for " + self.current_user) syslog.closelog() # remove empty strings self.deleted_files = [df for df in self.deleted_files if df] def __is_file_deleted(self, filename, username): ''' Check if the file is present in the list of files to be deleted. The list contains only files to be deleted in the current user's mailboxes, so if the filename passed is relative to another user mailbox, we need to reload the deleted_files. ''' # if the user changed, we need to update our file list if username != self.current_user: self.current_user = username self.__get_deleted_files() for deleted_file in self.deleted_files: try: if os.path.samefile(filename, deleted_file): return True except OSError: # make sure that it was not a missing file in our list # that raised the exception (if so we can't return yet) if os.path.isfile(deleted_file): return False return False def __is_dir_deleted(self, filename): ''' Check if the file is in a dir marked to be deleted by cyrus (this is the case when the dir contains a .deleted file). To avoid unecessary checks, we only look again for this flag file when we get filenames from a different directory. Note that the existance of the .deleted file only means that the current folder is marked to be deleted one day and that all the messages files it contains are invalid. However, the subfolders should not be ignored as their messages are perfectly valid as long as they don't have the .deleted file themselves (and the files are not marked to be unlinked). ''' # we add the slash to directories so dirname() will return # the directory itself, and not its parent directory if os.path.isdir(filename) and filename[-1] != '/': filename += '/' # only check again if we changed dirs if os.path.dirname(filename) != self.current_dir: self.current_dir = os.path.dirname(filename) self.skip_dir = False if os.path.isfile(os.path.join(self.current_dir, '.deleted')): self.skip_dir = True return self.skip_dir def keep_file(self, filename): ''' Main code: do all the checks to see if the file should be kept. ''' # the checks are only for imap message files, # so we ignore those not matched by the regex try: username = self.regex.match(filename).group(1) if (self.__is_dir_deleted(filename) or self.__is_file_deleted(filename, username)): return False except AttributeError: pass return True def main(argv): ''' This is where the call to find is done. Because we need to filter the files found, we almost surely will be left behind by find and cannot expect to echo its output in realtime. So we go by reading line by line from the output buffer and echoing the results after we make sure the file is to be kept. ''' find_call = ['/usr/bin/find'] + argv find_cmd = subprocess.Popen(find_call, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) mail_file_filter = MailFileFilter() # readline blocks execution until we have a whole line # and returns None on EOF line = find_cmd.stdout.readline() while line: stripped_line = line.strip() if stripped_line and mail_file_filter.keep_file(stripped_line): print stripped_line line = find_cmd.stdout.readline() if __name__ == "__main__": if len(sys.argv) < 2: print "Usage: %s " % (sys.argv[0]) sys.exit(1) main(sys.argv[1:])