Python/Data Struc & algo

[Python] IIS log date Sqlite3 Memory DB and pandas DataFrame

Pydole 2018. 5. 24. 23:05

 - pandas.read_sql_query를 통해 쿼리의 결과값을 pandas DataFrame 가져올 수 있다

 


# 약 42만 라인의 IIS로그를 SQLite3 메모리DB로 생성하여 'scbytes', 'csbytes', 'timetaken' 컬럼을 pandas와 연동

 

 

 pandas.read_sql_query(쿼리, conn)

 

import sqlite3
from datetime import datetime
import pandas as pd

conn = sqlite3.connect(':memory:')
c = conn.cursor()
c.execute('''CREATE TABLE memorylogdb
             (date date, time time, sitename VARCHAR(50), computername VARCHAR(50), sip VARCHAR(20), method VARCHAR(20),
              csuristem VARCHAR(500), csuriquery VARCHAR(5000), sport VARCHAR(20), csusername VARCHAR(100), cip VARCHAR(20),
             csversion VARCHAR(100), useragent VARCHAR(2000), cookie VARCHAR(3000), referer VARCHAR(3000), cshost VARCHAR(100),
             scstatus VARCHAR(20), substatus VARCHAR(20), scwin32status VARCHAR(20), scbytes INT(50), csbytes INT(50), timetaken INT(50))''')

lst = [ i for i in range(22)]

with open(path, 'r', errors='replace') as f:
    lines = f.readlines()
    for line in lines:
        log = line.split(' ')
        if log[0][0] == '#':
            pass
        else:
            for x, y in enumerate(log):
                lst[x] = str(y)

            c.execute('''INSERT INTO memorylogdb VALUES('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s',
             '%s', '%s', '%s', '%s', '%s', '%s', '%s')'''
                      %(lst[0],lst[1],lst[2],lst[3],lst[4],lst[5],lst[6],lst[7],lst[8],lst[9],lst[10],lst[11],lst[12],lst[13],lst[14],lst[15],lst[16],
                        lst[17],lst[18],lst[19],lst[20],lst[21]))

df = pd.read_sql_query("SELECT scbytes, csbytes, timetaken FROM memorylogdb", conn)
print(df)

        scbytes  csbytes  timetaken
0           468      398         93
1          3893      404         78
2          1398      236         15
3          1398      224         15
4          1398      228         15
5          1398      216         15
6          1398      207          0
7          1398      236         15
8          1398      224         15
9          1398      228         15
10         1398      216         15
11         1398      207         62
12         1398      250          0
13        31784     1504         46
14         4440     1944         46
15          247     2024         46
16         1398     1815         46
17         1398     1814         78
18         9523     1915         78
19         1398     1808         78
20         1398     1811         78
21         1398     1819         78
22         1398     1812        124
23         1398     1817        124
24         1398     1815        124
25         1398     1816        124
26         1398     1814        124
27         1398     1818         78
28         1398     1813         78
29         1398     1815         62
...         ...      ...        ...
419527     1398     1151         15
419528     4418     1789         46
419529      247     1964         78
419530      247     1398         62
419531      247     1356         78
419532     5921     1144         78
419533     1398     1106         15
419534     1398     1110         15
419535     1398      274         46
419536     1398     1104         46
419537     1398     1117         46
419538     1398     1109         46
419539     1398     1106         46
419540     1398     1108         31
419541     1398     1104         31
419542     1398     1109         15
419543     3469     1130         15
419544     1398     1105         15
419545     1398     1108         15
419546     1398     1103         15
419547     1398     1116         15
419548     1398     1105         15
419549     4621     1140         46
419550     1398     1107         46
419551     1810     1909         62
419552     1398     1808         31
419553     1398     1834         15
419554  4346106     1807        889
419555      475       91          0
419556    20021      105         31

[419557 rows x 3 columns]