Longest loglines
.py
Logarithmic scale for top N longest lines of some log
def _longest(obj, n=20):
'''
Top n longest lines in the obj (file, filename, iterable)
'''
from collections import Counter
import math
if hasattr(obj, 'readlines'):
0)
obj.seek(= Counter(len(z) for z in obj.readlines())
c elif isinstance(obj, str):
= open(obj, 'r', encoding='utf8')
f = Counter(len(z) for z in f.readlines())
c
f.close()else: # treat as an iterable
try:
= Counter(map(len, obj))
c except TypeError:
return
= sorted(c.keys())
lengths
print(format('Line length', '>15'),
format('Ln(len/prev)', '>15'),
format('No. of lines', '>15'), )
= 1
prev for i in lengths[-20:]:
= math.log(i/prev)
ln print(format(i, '15'),
format(ln, '15.1f') if ln > 0.2 else format(0, '15'),
format(c[i], '15'), )
= i
prev
'''
Initially written for the log preanalysis,
using:
>>> _Longest('logfile.log')
Line length Ln(len/prev) No. of lines
1242206 ... 1
1306005 0 1
1490750 0 1
2338506 0.5 2
4817360 0.7 1
8017102 0.5 1
'''