## import necessary libraries import pandas as pd import matplotlib.pyplot as plt import re plt.style.use('fivethirtyeight') #Five thirty eight style for all plots print plt.style.available %matplotlib inline ## Read in the logfile line-by-line into a list with open(r' ') as f: ###!!!!! Enter your file path between the quotes lines = f.readlines() f.close() ## Testing my Regex patterns to match all required table attributes print 'Date:', re.findall('\d+/\d+/\d+', lines[1383]) #Date print 'Time:', re.findall('\d+:\d+:\d+', lines[1383]) #Timestamp print 'UserID:',re.findall('(\S+@\S+)', lines[1109]) #User ID print 'Name:',re.findall('\w+@', lines[1109])[0][:-1] #Name/ User print 'Location:',re.findall('@\D+', lines[1109])[0][1:] #Location print 'Message:', re.findall('"\S+"', lines[1109])[0][1:-1] #message ## The following loop block parses the logfile and matches the Date with user mylist = [] #To store the info for i in range(len(lines)): if 'TIMESTAMP' in lines[i] and 'TIMESTAMP' not in lines[i+1]: print i+1, 'Found' j = i+1 flag = True while flag: if 'TIMESTAMP' in lines[j]: #i=j-2; flag = False elif len(re.findall('(\S+@\S+)', lines[j]))>0: mylist.append([re.findall('\d+/\d+/\d+', lines[i])[0],re.findall('\d+:\d+:\d+', lines[j])[0],re.findall('(\S+@\S+)', lines[j])[0],re.findall('\w+@', lines[j])[0][:-1],re.findall('@\D+', lines[j])[0][1:],re.findall('"\S+"', lines[j])[0][1:-1]]) j = j+1 elif len(re.findall('(\S+@\S+)', lines[j])) == 0: j = j+1 else: print i+1,'not found' len(mylist)#checking length of the list with the data mylist[896] #checking an entry for completeness ##Creating a pandas dataframe from the list mydata = pd.DataFrame(mylist, columns =(['Date', 'Time', 'UserID', 'Name', 'LocationID', 'Message'])) ##Convert Date column to datetime format mydata['Date'] = pd.to_datetime(mydata['Date']) ##Export to excel file #mydata.to_excel(r' ') ###!!!!! Enter your output file path between the quotes