#!/usr/bin/python3 # Parse /proc/locks and list waiting programs. # Useful for identifying the cause of a deadlock. # Should be executed as root for /proc access. # # If you identified the first hanging process, # run "gstack pid" to get a stack trace. # # Licensed under the same terms as cyrus-imapd 2.4 / 2.5 / 2.6 # (c) 2015 Intra2net AG - Thomas Jarosch import os import re import argparse parser = argparse.ArgumentParser(description='Parse /proc/locks with special ' 'regard for cyrus imapd') parser.add_argument('--all-programs', action='store_true', default=False, help='Show all locks, not just cyrus') parser.add_argument('--show-sockets', action='store_true', default=False, help='Show locks in imap-db/socket/') parser.add_argument('--show-pidfiles', action='store_true', default=False, help='Show locks in /var/run') parser.add_argument('--show-non-waiting', action='store_true', default=False, help="Show locks that don't have waiters") args = parser.parse_args() cyrus_bindir = '/usr/cyrus/bin/' socket_dir = '/datastore/imap-db/socket/' locks = [] # parse locks and waiters with open('/proc/locks') as f: for line in f: # remove double spaces line = line.replace(' ', ' ') # Format examples: # 46: FLOCK ADVISORY WRITE 5542 00:25:829847 0 EOF # 46: -> FLOCK ADVISORY WRITE 5544 00:25:829847 0 EOF # 50: POSIX MANDATORY READ 4820 fd:04:3815033 1073741826 1073742335 fields = re.match('\d+: (?P-> )?[A-Z]+ [A-Z]+ ' '(?PREAD|WRITE) ' '(?P\d+) ' '(?P[a-f0-9]{2}):' '(?P[a-f0-9]{2}):' '(?P\d+) .*', line) if not fields: print('WARN: Ignoring unmatched line output: {0}'.format(line)) continue waiting = False if fields.group('waiting') is not None: waiting = True pid = fields.group('pid') proc_path = os.path.join('/proc', pid) if not os.path.isdir(proc_path): print('INFO: Program with pid {0} vanished'.format(pid)) continue prog_name = os.readlink(os.path.join(proc_path, 'exe')) # convert dev number from hex to decimal # as returned in os.stat() decimal_devnode = int('{0}{1}'.format(fields.group('dev_major'), fields.group('dev_minor')), 16) # look up filename locked_filename = 'UNKNOWN' fd_path = os.path.join(proc_path, 'fd') for fd_file in os.listdir(fd_path): fd_fullpath = os.path.join(fd_path, fd_file) stat_res = os.stat(fd_fullpath) if stat_res.st_ino == int(fields.group('inode')) and \ stat_res.st_dev == decimal_devnode: locked_filename = os.readlink(fd_fullpath) break # store new_lock = {'prog': prog_name, 'pid': pid, 'mode': fields.group('mode'), 'file': locked_filename, 'waiters': [] } if waiting: locks[-1]['waiters'].append(new_lock) else: locks.append(new_lock) # Output locks and possible waiters shown_something = False for lock in locks: prog = lock['prog'] file = lock['file'] # Skip known locks that are always waiting if file.startswith(socket_dir) and not args.show_sockets: continue if file.endswith('.pid') and not args.show_pidfiles: continue if not prog.startswith(cyrus_bindir) and \ args.all_programs is False: continue waiters = lock['waiters'] if len(waiters) or args.show_non_waiting: shown_something = True print('{0} (pid {1}) holding {2} lock for {3}'.format( prog, lock['pid'], lock['mode'], lock['file'])) for waiter in waiters: print('{0} (pid {1}) ++WAITING++ for {2} lock on {3}'.format( waiter['prog'], waiter['pid'], waiter['mode'], waiter['file'])) if len(waiters): print('') if len(locks) and shown_something is False: print('Hint: No locks shown. Try --all-programs or --help for more modes')