clean_db.py 1.51 KB
Newer Older
Monica Rainer's avatar
Monica Rainer committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""
Remove files form calfiles that are not present in drs_calib.db
"""

import sqlite3, os, glob
from drslib.config import CONFIG

pathdb = CONFIG['DB_CALIB_PATH'] 
tablename = CONFIG['DB_CALIB_TBL']
pathcal = CONFIG['CALIB_DIR']

def connect_db_cal():
    dbcal = sqlite3.connect(pathdb)
    return dbcal

def column_names(dbcalib, tablename):
    """
    Return the names of the columns in the database.
    """
    dbcalib.row_factory = sqlite3.Row
    cursor = dbcalib.execute("SELECT * FROM {table}"\
                .format(table=tablename,))
    row = cursor.fetchone()
    names = row.keys()
    return names


def check_db(dbcalib, tablename, columns, calname):
    """
    Return True if the file does not exist in the database.
    """

    cursor = dbcalib.cursor()

    for col in columns:
        #print col
        try:
            cursor.execute("SELECT * FROM {table} WHERE instr({fpath},?) > 0"\
                .format(table=tablename,fpath=col),(calname,))
            found = cursor.fetchone()[0]

            if found:
                return False
        except:
            pass

    return True


if __name__ == "__main__":
# list the calibration files
    names = os.path.join(pathcal,"*.fits")
    cals = glob.glob(names)
    cals.sort()

    dbcal = connect_db_cal()
    columns = column_names(dbcal, tablename)

    while cals:
        cal = os.path.basename(cals[0])
        check = check_db(dbcal, tablename, columns, cal)

        if check:
            os.remove(cals[0])

        cals.pop(0)
    dbcal.close()