main.py 3.09 KB
Newer Older
1
2
3
4
5
6
7
"""
    # main.py

    - Load된 데이터들에 대해 Linear Regression을 진행합니다.
    - 진행된 후의 Weights를 파일로 저장합니다.
"""

8
9
import datetime
from os import getcwd
10
11
import sys
import pymysql
12
import numpy as np
13
from preprocessing import preprocessingData
14
from linear_regression import LinearRegression
15
16


17
18
19
20
21
def storeParameters(link, filename, data):
    today = datetime.datetime.today()
    year = str(today.year)
    month = str(today.month) if today.month >= 10 else '0'+str(today.month)
    day = str(today.day) if today.day >= 10 else '0'+str(today.day)
22

23
    time_dir = '/' + year + '/' + year+month + '/' + year + month + day
24

25
    file_dir = getcwd() + link + time_dir + filename
26
27
28
29
30
31

    file = open(file_dir, "w")

    file.write(data)

    file.close()
32
33
34
35
36


dbconfig = {"host": sys.argv[1], "user": sys.argv[2],
            "password": sys.argv[3], "database": sys.argv[4]}

37
38
print(dbconfig)

39
40
41
42
43
44
45
46
47
eue_db = pymysql.connect(user=dbconfig["user"], password=dbconfig["password"],
                         host=dbconfig["host"], db=dbconfig["database"], charset='utf8')
cursor = eue_db.cursor(pymysql.cursors.DictCursor)

query = "SELECT ID,DATALINK FROM USER;"
cursor.execute(query)
result = cursor.fetchall()

for userdata in result:
48

49
50
    print("User ID : ", userdata["ID"])
    print("Data Processing Start...")
51
52
53
54
55
56
57
58
59

    # Get Data links
    # ./data/DO/SGG/EMD/Users/ID
    user_datalink = userdata["DATALINK"]
    dir_ls = user_datalink.split("/")
    # ./data/DO/SGG/EMD/Outside
    outside_datalink = ("/").join(dir_ls[:-2]) + "/Outside"

    # data load
60
61
    train_x, train_t, weights, bias, mean, std_d = preprocessingData(
        user_datalink, outside_datalink)
62
63

    # linear regression
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
    model = LinearRegression(train_x, train_t, weights,
                             bias, learning_rate=0.05)
    model.gradientDescent()


'''
    # Test Codes Start.
'''

print("After Linear Regression -\n")
test_data = np.array([[5], [20], [0], [16.87], [40], [
    1011], [0.72], [26.70], [47.00], [64]])
test_data = (test_data - mean) / std_d
y_hat = model.predict(test_data, model.weights, model.bias)

print("Test Data.\n", test_data, "\n")
print("Predict - standard deviation : ", y_hat)
print("Predict - temperature : ", y_hat*std_d[7][0] + mean[7][0], "\n")
print("Cost.")
print(model.cost_MSE(model.train_x, model.train_t,
                     model.weights, model.bias), "\n")
print("Weights.")
print(model.weights, "\n")
print("Bias.")
print(model.bias)


'''
    # Test Codes End.
'''

# Save the Parameters.

# - analysis_parameters
analysis_data = ""

for i in range(len(model.weights[0])):
    analysis_data += str(model.weights[0][i]) + ','
analysis_data += str(model.bias)

storeParameters(user_datalink, "/analysis_parameters.csv", analysis_data)

# - prediction_parameters
prediction_data = ""

for i in range(len(mean)):
    prediction_data += str(mean[i][0]) + ','
prediction_data = prediction_data[:-1]
prediction_data += '\n'

for i in range(len(std_d)):
    prediction_data += str(std_d[i][0]) + ','
prediction_data = prediction_data[:-1]

storeParameters(
    user_datalink, "/prediction_parameters.csv", prediction_data)