Commit 90d1881e authored by KangMin An's avatar KangMin An
Browse files

Create & Update: 데이터 분석 결과 저장 과정 갱신.

parent 94720bc6
...@@ -6,31 +6,41 @@ ...@@ -6,31 +6,41 @@
""" """
import datetime import datetime
from server.src.data_processing.model import modeling import os
import sys
import psycopg2 import psycopg2
from psycopg2.extras import Json
import sys
from preprocessing import preprocess from preprocessing import preprocess
from model import modeling
# DB 환경 변수 # DB 환경 변수
dbconfig = {"host": sys.argv[1], "user": sys.argv[2], dbconfig = {"host": sys.argv[1], "port": sys.argv[2], "user": sys.argv[3],
"password": sys.argv[3], "database": sys.argv[4]} "password": sys.argv[4], "database": sys.argv[5]}
data_dir = os.getcwd() + "/src/data_processing/temp.csv"
model_dir = os.getcwd() + "/src/data_processing/model.h5"
today = datetime.datetime.today()
year = str(today.year) def makeDateForm():
month = str(today.month) if today.month >= 10 else '0'+str(today.month) today = datetime.datetime.today()
day = str(today.day) if today.day >= 10 else '0'+str(today.day) year = str(today.year)
collected_at = year + "-" + month + "-" + day month = str(today.month) if today.month >= 10 else '0'+str(today.month)
day = str(today.day) if today.day >= 10 else '0'+str(today.day)
collected_at = year + "-" + month + "-" + day
return collected_at
# DB 연결 # DB 연결
connection = psycopg2.connect( connection = psycopg2.connect(
database=dbconfig["database"], user=dbconfig["user"]) dbname=dbconfig["database"], user=dbconfig["user"], password=dbconfig["password"], host=dbconfig["host"], port=dbconfig["port"])
# DB에 대한 동작을 위한 cursor 생성 # DB에 대한 동작을 위한 cursor 생성
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute("SELECT email, loc_code, using_aircon FROM Users") cursor.execute("SELECT email, loc_code, using_aircon FROM \"Users\"")
users = cursor.fetchall() users = cursor.fetchall()
for user in users: for user in users:
...@@ -38,14 +48,35 @@ for user in users: ...@@ -38,14 +48,35 @@ for user in users:
host = {"email": user[0], "loc_code": user[1], "using_aircon": user[2]} host = {"email": user[0], "loc_code": user[1], "using_aircon": user[2]}
# 데이터 전처리 # 데이터 전처리
standard_df, mean_df , std_df = preprocess(cursor, host) standard_df, mean_df, std_df = preprocess(cursor, host)
# 데이터 분석 # 데이터 분석
modeling(standard_df) modeling(standard_df)
# 데이터 분석 결과 저장 # 데이터 분석 결과 저장
# cursor.execute("INSERT INTO \"Data_Processings\" (host,collected_at,params) VALUES (%s,%s,%s)",
# (host["email"], collected_at, params)) collected_at = makeDateForm()
model_file = open(model_dir, 'rb')
model_file_data = model_file.read()
params = {"mean": mean_df.to_json(), "std": std_df.to_json()}
cursor.execute("INSERT INTO \"Data_Processings\" (host,collected_at,model_file,params) VALUES (%s,%s,%s,%s)",
(host["email"],
collected_at,
model_file_data,
Json(params),))
connection.commit()
model_file.close()
if os.path.isfile(data_dir):
os.remove(data_dir)
if os.path.isfile(model_dir):
os.remove(model_dir)
# Cursor와 Connection 종료 # Cursor와 Connection 종료
cursor.close() cursor.close()
......
from preprocessing import standard_df
import numpy as np import numpy as np
import os
import tensorflow as tf import tensorflow as tf
def modeling(standard_df): def modeling(standard_df):
n= len(standard_df) n = len(standard_df)
test_size = int(0.3 * n) test_size = int(0.3 * n)
train = standard_df[:-test_size] train = standard_df[:-test_size]
test = standard_df[-test_size:] test = standard_df[-test_size:]
def make_dataset(data, label, window_size = 24): def make_dataset(data, label, window_size=24):
feature_list = [] feature_list = []
label_list = [] label_list = []
for i in range(len(data) - window_size): for i in range(len(data) - window_size):
feature_list.append(np.array(data.iloc[i:i+window_size])) feature_list.append(np.array(data.iloc[i:i+window_size]))
label_list.append(np.array(label.iloc[i + window_size ])) label_list.append(np.array(label.iloc[i + window_size]))
return np.array(feature_list), np.array(label_list) return np.array(feature_list), np.array(label_list)
feature_cols = ['temp_out','humi_out','press','wind_speed','Day sin','Day cos','Year sin','Year cos'] feature_cols = ['temp_out', 'humi_out', 'press',
'wind_speed', 'Day sin', 'Day cos', 'Year sin', 'Year cos']
label_cols = ['temp_out'] label_cols = ['temp_out']
train_feature = train[feature_cols] train_feature = train[feature_cols]
...@@ -26,23 +28,23 @@ def modeling(standard_df): ...@@ -26,23 +28,23 @@ def modeling(standard_df):
test_feature = test[feature_cols] test_feature = test[feature_cols]
test_label = test[label_cols] test_label = test[label_cols]
train_feature, train_label = make_dataset(train_feature, train_label,window_size=6) train_feature, train_label = make_dataset(
test_feature, test_label = make_dataset(test_feature, test_label,window_size=6) train_feature, train_label, window_size=6)
test_feature, test_label = make_dataset(
test_feature, test_label, window_size=6)
model = tf.keras.Sequential([ model = tf.keras.Sequential([
tf.keras.layers.LSTM(16, tf.keras.layers.LSTM(16,
return_sequences=False, return_sequences=False,
input_shape = (6,7)), input_shape=(6, 7)),
tf.keras.layers.Dense(1) tf.keras.layers.Dense(1)
]) ])
model.compile(loss = 'mse',optimizer = 'adam') model.compile(loss='mse', optimizer='adam')
model.fit(train_feature, train_label, # model.fit(train_feature, train_label, epochs=50, batch_size=1000)
epochs =50, batch_size = 1000)
model.save('/src/dataprocessing/model.h5') model.save(os.getcwd() + '/src/data_processing/model.h5')
#사용할때 # 사용할때
# new = tf.keras.models.load_model('/src/dataprocessing/model.h5') # new = tf.keras.models.load_model('/src/dataprocessing/model.h5')
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
import pandas as pd import pandas as pd
import datetime import datetime
import numpy as np import numpy as np
import os
def preprocess(cursor, host): def preprocess(cursor, host):
""" """
...@@ -18,33 +20,55 @@ def preprocess(cursor, host): ...@@ -18,33 +20,55 @@ def preprocess(cursor, host):
- host : 사용자 정보. - host : 사용자 정보.
사용자 정보를 바탕으로 외부 날씨와 내부 날씨를 검색해 CSV 파일로 작성합니다. 사용자 정보를 바탕으로 외부 날씨와 내부 날씨를 검색해 CSV 파일로 작성합니다.
CSV 파일 생성 후 pandas를 이용해 dataframe으로 만든 뒤, 정규화를 진행합니다.
""" """
# # 데이터 수집기 오류로 인해 보류
# cursor.execute(
# "SELECT t2.collected_at as \"date\", temp_out, humi_out, press, wind_speed, temp_in, humi_in, lights FROM"
# + " (SELECT collected_at, temp as temp_out, humi as humi_out,press, wind_speed FROM Weather_Outs WHERE loc_code = %s) t1"
# + " JOIN "
# + " (SELECT collected_at, temp as temp_in, humi as humi_in, lights FROM Weather_Ins WHERE host = %s) t2"
# + " ON t1.collected_at = t2.collected_at", (host["loc_code"], host["email"],))
# results = cursor.fetchall()
# file = open("/src/dataprocessing/temp.csv", 'w')
# # header
# file.write("date,temp_out,humi_out,press,wind_speed,temp_in,humi_in,lights\n")
# for result in results:
# file.write("{0},{1},{2},{3},{4},{5},{6},{7}\n".format(
# result[0], result[1], result[2], result[3], result[4], result[5], result[6], result[7]))
# file.close()
# 사용자의 거주 지역의 실외 데이터 검색
cursor.execute( cursor.execute(
"SELECT t2.collected_at as \"date\", temp_out, humi_out, press, wind_speed, temp_in, humi_in, lights FROM" "SELECT collected_at as \"date\", temp as temp_out, humi as humi_out, press, wind_speed "
+ " (SELECT collected_at, temp as temp_out, humi as humi_out,press, wind_speed FROM Weather_Outs WHERE loc_code = %s) t1" + "From \"Weather_Outs\" "
+ " JOIN " + "WHERE loc_code = %s", (host["loc_code"],)
+ " (SELECT collected_at, temp as temp_in, humi as humi_in, lights FROM Weather_Ins WHERE host = %s) t2" )
+ " ON t1.collected_at = t2.collected_at", (host["loc_code"], host["email"],))
results = cursor.fetchall() results = cursor.fetchall()
file = open("/src/dataprocessing/temp.csv", 'w') file = open(os.getcwd() + "/src/data_processing/temp.csv", 'w')
# header # header
file.write("date,temp_out,humi_out,press,wind_speed,temp_in,humi_in,lights\n") file.write("date,temp_out,humi_out,press,wind_speed\n")
for result in results: for result in results:
file.write("{0},{1},{2},{3},{4},{5},{6},{7}\n".format( file.write("{0},{1},{2},{3},{4}\n".format(
result[0], result[1], result[2], result[3], result[4], result[5], result[6], result[7])) result[0], result[1], result[2], result[3], result[4]))
file.close() file.close()
df = pd.read_csv(os.getcwd() + "/src/data_processing/temp.csv")
df = pd.read_csv("/src/dataprocessing/temp.csv")
date_time = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M') date_time = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M')
timestamp_s = date_time.map(datetime.datetime.timestamp) timestamp_s = date_time.map(datetime.datetime.timestamp)
df = df[['temp_out','humi_out','press','wind_speed']] df = df[['temp_out', 'humi_out', 'press', 'wind_speed']]
day = 24*60*60 day = 24*60*60
year = (365.2425)*day year = (365.2425)*day
...@@ -56,9 +80,9 @@ def preprocess(cursor, host): ...@@ -56,9 +80,9 @@ def preprocess(cursor, host):
def standard(dataframe): def standard(dataframe):
mean = dataframe.mean() mean = dataframe.mean()
std = dataframe.std() std = dataframe.std()
zscore = ( dataframe - mean ) / std zscore = (dataframe - mean) / std
return zscore, mean, std return zscore, mean, std
standard_df, mean_df , std_df = standard(df) standard_df, mean_df, std_df = standard(df)
return standard_df, mean_df , std_df return standard_df, mean_df, std_df
\ No newline at end of file
...@@ -18,6 +18,9 @@ export class Data_Processing extends Model { ...@@ -18,6 +18,9 @@ export class Data_Processing extends Model {
primaryKey: true, primaryKey: true,
defaultValue: Date.now(), defaultValue: Date.now(),
}, },
model_file: {
type: DataTypes.BLOB,
},
params: { params: {
type: DataTypes.JSON, type: DataTypes.JSON,
}, },
......
...@@ -26,6 +26,7 @@ const dataProcessingJob = schedule.scheduleJob(rule_dataProcessing, () => { ...@@ -26,6 +26,7 @@ const dataProcessingJob = schedule.scheduleJob(rule_dataProcessing, () => {
const pyprocess = spawn("python", [ const pyprocess = spawn("python", [
DATA_PROCESSING_DIR, DATA_PROCESSING_DIR,
envs.db.host, envs.db.host,
envs.db.port,
envs.db.user, envs.db.user,
envs.db.password, envs.db.password,
envs.db.database, envs.db.database,
...@@ -57,11 +58,22 @@ const rules_weather_out_store = { ...@@ -57,11 +58,22 @@ const rules_weather_out_store = {
}; };
rules_weather_out_store["00m"].minute = 0; rules_weather_out_store["00m"].minute = 0;
rules_weather_out_store["00m"].second = 0;
rules_weather_out_store["10m"].minute = 10; rules_weather_out_store["10m"].minute = 10;
rules_weather_out_store["10m"].second = 0;
rules_weather_out_store["20m"].minute = 20; rules_weather_out_store["20m"].minute = 20;
rules_weather_out_store["20m"].second = 0;
rules_weather_out_store["30m"].minute = 30; rules_weather_out_store["30m"].minute = 30;
rules_weather_out_store["30m"].second = 0;
rules_weather_out_store["40m"].minute = 40; rules_weather_out_store["40m"].minute = 40;
rules_weather_out_store["40m"].second = 0;
rules_weather_out_store["50m"].minute = 50; rules_weather_out_store["50m"].minute = 50;
rules_weather_out_store["50m"].second = 0;
// 임의의 사용자 데이터 등록 // 임의의 사용자 데이터 등록
const coordinates = [ const coordinates = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment