Longest loglines

.py

Logarithmic scale for top N longest lines of some log

def _longest(obj, n=20):
    '''
    Top n longest lines in the obj (file, filename, iterable)
    '''
    from collections import Counter
    import math

    if hasattr(obj, 'readlines'):
        obj.seek(0)
        c = Counter(len(z) for z in obj.readlines())
    elif isinstance(obj, str):
        f = open(obj, 'r', encoding='utf8')
        c = Counter(len(z) for z in f.readlines())
        f.close()
    else: # treat as an iterable
        try:
            c = Counter(map(len, obj))
        except TypeError:
            return

    lengths = sorted(c.keys())

    print(format('Line length', '>15'),
          format('Ln(len/prev)', '>15'),
          format('No. of lines', '>15'), )
    prev = 1
    for i in lengths[-20:]:
        ln = math.log(i/prev)
        print(format(i, '15'),
              format(ln, '15.1f') if ln > 0.2 else format(0, '15'),
              format(c[i], '15'), )
        prev = i

'''
Initially written for the log preanalysis,
using:
>>> _Longest('logfile.log')
    Line length    Ln(len/prev)    No. of lines
        1242206             ...               1
        1306005               0               1
        1490750               0               1
        2338506             0.5               2
        4817360             0.7               1
        8017102             0.5               1
'''