Commit 90d1881e authored by KangMin An's avatar KangMin An
Browse files

Create & Update: 데이터 분석 결과 저장 과정 갱신.

parent 94720bc6
......@@ -6,31 +6,41 @@
"""
import datetime
from server.src.data_processing.model import modeling
import sys
import os
import psycopg2
from psycopg2.extras import Json
import sys
from preprocessing import preprocess
from model import modeling
# DB 환경 변수
dbconfig = {"host": sys.argv[1], "user": sys.argv[2],
"password": sys.argv[3], "database": sys.argv[4]}
dbconfig = {"host": sys.argv[1], "port": sys.argv[2], "user": sys.argv[3],
"password": sys.argv[4], "database": sys.argv[5]}
data_dir = os.getcwd() + "/src/data_processing/temp.csv"
model_dir = os.getcwd() + "/src/data_processing/model.h5"
today = datetime.datetime.today()
year = str(today.year)
month = str(today.month) if today.month >= 10 else '0'+str(today.month)
day = str(today.day) if today.day >= 10 else '0'+str(today.day)
collected_at = year + "-" + month + "-" + day
def makeDateForm():
today = datetime.datetime.today()
year = str(today.year)
month = str(today.month) if today.month >= 10 else '0'+str(today.month)
day = str(today.day) if today.day >= 10 else '0'+str(today.day)
collected_at = year + "-" + month + "-" + day
return collected_at
# DB 연결
connection = psycopg2.connect(
database=dbconfig["database"], user=dbconfig["user"])
dbname=dbconfig["database"], user=dbconfig["user"], password=dbconfig["password"], host=dbconfig["host"], port=dbconfig["port"])
# DB에 대한 동작을 위한 cursor 생성
cursor = connection.cursor()
cursor.execute("SELECT email, loc_code, using_aircon FROM Users")
cursor.execute("SELECT email, loc_code, using_aircon FROM \"Users\"")
users = cursor.fetchall()
for user in users:
......@@ -38,15 +48,36 @@ for user in users:
host = {"email": user[0], "loc_code": user[1], "using_aircon": user[2]}
# 데이터 전처리
standard_df, mean_df , std_df = preprocess(cursor, host)
standard_df, mean_df, std_df = preprocess(cursor, host)
# 데이터 분석
modeling(standard_df)
# 데이터 분석 결과 저장
# cursor.execute("INSERT INTO \"Data_Processings\" (host,collected_at,params) VALUES (%s,%s,%s)",
# (host["email"], collected_at, params))
collected_at = makeDateForm()
model_file = open(model_dir, 'rb')
model_file_data = model_file.read()
params = {"mean": mean_df.to_json(), "std": std_df.to_json()}
cursor.execute("INSERT INTO \"Data_Processings\" (host,collected_at,model_file,params) VALUES (%s,%s,%s,%s)",
(host["email"],
collected_at,
model_file_data,
Json(params),))
connection.commit()
model_file.close()
if os.path.isfile(data_dir):
os.remove(data_dir)
if os.path.isfile(model_dir):
os.remove(model_dir)
# Cursor와 Connection 종료
cursor.close()
connection.close()
\ No newline at end of file
connection.close()
from preprocessing import standard_df
import numpy as np
import os
import tensorflow as tf
def modeling(standard_df):
n= len(standard_df)
n = len(standard_df)
test_size = int(0.3 * n)
train = standard_df[:-test_size]
test = standard_df[-test_size:]
def make_dataset(data, label, window_size = 24):
def make_dataset(data, label, window_size=24):
feature_list = []
label_list = []
for i in range(len(data) - window_size):
feature_list.append(np.array(data.iloc[i:i+window_size]))
label_list.append(np.array(label.iloc[i + window_size ]))
label_list.append(np.array(label.iloc[i + window_size]))
return np.array(feature_list), np.array(label_list)
feature_cols = ['temp_out','humi_out','press','wind_speed','Day sin','Day cos','Year sin','Year cos']
feature_cols = ['temp_out', 'humi_out', 'press',
'wind_speed', 'Day sin', 'Day cos', 'Year sin', 'Year cos']
label_cols = ['temp_out']
train_feature = train[feature_cols]
train_label = train[label_cols]
test_feature = test[feature_cols]
test_label = test[label_cols]
train_feature, train_label = make_dataset(train_feature, train_label,window_size=6)
test_feature, test_label = make_dataset(test_feature, test_label,window_size=6)
train_feature, train_label = make_dataset(
train_feature, train_label, window_size=6)
test_feature, test_label = make_dataset(
test_feature, test_label, window_size=6)
model = tf.keras.Sequential([
tf.keras.layers.LSTM(16,
return_sequences=False,
input_shape = (6,7)),
tf.keras.layers.LSTM(16,
return_sequences=False,
input_shape=(6, 7)),
tf.keras.layers.Dense(1)
])
model.compile(loss = 'mse',optimizer = 'adam')
model.fit(train_feature, train_label,
epochs =50, batch_size = 1000)
model.save('/src/dataprocessing/model.h5')
model.compile(loss='mse', optimizer='adam')
# model.fit(train_feature, train_label, epochs=50, batch_size=1000)
model.save(os.getcwd() + '/src/data_processing/model.h5')
#사용할때
# new = tf.keras.models.load_model('/src/dataprocessing/model.h5')
\ No newline at end of file
# 사용할때
# new = tf.keras.models.load_model('/src/dataprocessing/model.h5')
......@@ -9,6 +9,8 @@
import pandas as pd
import datetime
import numpy as np
import os
def preprocess(cursor, host):
"""
......@@ -17,34 +19,56 @@ def preprocess(cursor, host):
- cursor : psycopg2의 SQL 실행을 위한 cursor.
- host : 사용자 정보.
사용자 정보를 바탕으로 외부 날씨와 내부 날씨를 검색해 CSV 파일로 작성합니다.
사용자 정보를 바탕으로 외부 날씨와 내부 날씨를 검색해 CSV 파일로 작성합니다.
CSV 파일 생성 후 pandas를 이용해 dataframe으로 만든 뒤, 정규화를 진행합니다.
"""
# # 데이터 수집기 오류로 인해 보류
# cursor.execute(
# "SELECT t2.collected_at as \"date\", temp_out, humi_out, press, wind_speed, temp_in, humi_in, lights FROM"
# + " (SELECT collected_at, temp as temp_out, humi as humi_out,press, wind_speed FROM Weather_Outs WHERE loc_code = %s) t1"
# + " JOIN "
# + " (SELECT collected_at, temp as temp_in, humi as humi_in, lights FROM Weather_Ins WHERE host = %s) t2"
# + " ON t1.collected_at = t2.collected_at", (host["loc_code"], host["email"],))
# results = cursor.fetchall()
# file = open("/src/dataprocessing/temp.csv", 'w')
# # header
# file.write("date,temp_out,humi_out,press,wind_speed,temp_in,humi_in,lights\n")
# for result in results:
# file.write("{0},{1},{2},{3},{4},{5},{6},{7}\n".format(
# result[0], result[1], result[2], result[3], result[4], result[5], result[6], result[7]))
# file.close()
# 사용자의 거주 지역의 실외 데이터 검색
cursor.execute(
"SELECT t2.collected_at as \"date\", temp_out, humi_out, press, wind_speed, temp_in, humi_in, lights FROM"
+ " (SELECT collected_at, temp as temp_out, humi as humi_out,press, wind_speed FROM Weather_Outs WHERE loc_code = %s) t1"
+ " JOIN "
+ " (SELECT collected_at, temp as temp_in, humi as humi_in, lights FROM Weather_Ins WHERE host = %s) t2"
+ " ON t1.collected_at = t2.collected_at", (host["loc_code"], host["email"],))
"SELECT collected_at as \"date\", temp as temp_out, humi as humi_out, press, wind_speed "
+ "From \"Weather_Outs\" "
+ "WHERE loc_code = %s", (host["loc_code"],)
)
results = cursor.fetchall()
file = open("/src/dataprocessing/temp.csv", 'w')
file = open(os.getcwd() + "/src/data_processing/temp.csv", 'w')
# header
file.write("date,temp_out,humi_out,press,wind_speed,temp_in,humi_in,lights\n")
file.write("date,temp_out,humi_out,press,wind_speed\n")
for result in results:
file.write("{0},{1},{2},{3},{4},{5},{6},{7}\n".format(
result[0], result[1], result[2], result[3], result[4], result[5], result[6], result[7]))
file.write("{0},{1},{2},{3},{4}\n".format(
result[0], result[1], result[2], result[3], result[4]))
file.close()
df = pd.read_csv("/src/dataprocessing/temp.csv")
df = pd.read_csv(os.getcwd() + "/src/data_processing/temp.csv")
date_time = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M')
timestamp_s = date_time.map(datetime.datetime.timestamp)
df = df[['temp_out','humi_out','press','wind_speed']]
df = df[['temp_out', 'humi_out', 'press', 'wind_speed']]
day = 24*60*60
year = (365.2425)*day
......@@ -56,9 +80,9 @@ def preprocess(cursor, host):
def standard(dataframe):
mean = dataframe.mean()
std = dataframe.std()
zscore = ( dataframe - mean ) / std
zscore = (dataframe - mean) / std
return zscore, mean, std
standard_df, mean_df , std_df = standard(df)
return standard_df, mean_df , std_df
\ No newline at end of file
standard_df, mean_df, std_df = standard(df)
return standard_df, mean_df, std_df
......@@ -18,6 +18,9 @@ export class Data_Processing extends Model {
primaryKey: true,
defaultValue: Date.now(),
},
model_file: {
type: DataTypes.BLOB,
},
params: {
type: DataTypes.JSON,
},
......
......@@ -26,6 +26,7 @@ const dataProcessingJob = schedule.scheduleJob(rule_dataProcessing, () => {
const pyprocess = spawn("python", [
DATA_PROCESSING_DIR,
envs.db.host,
envs.db.port,
envs.db.user,
envs.db.password,
envs.db.database,
......@@ -57,11 +58,22 @@ const rules_weather_out_store = {
};
rules_weather_out_store["00m"].minute = 0;
rules_weather_out_store["00m"].second = 0;
rules_weather_out_store["10m"].minute = 10;
rules_weather_out_store["10m"].second = 0;
rules_weather_out_store["20m"].minute = 20;
rules_weather_out_store["20m"].second = 0;
rules_weather_out_store["30m"].minute = 30;
rules_weather_out_store["30m"].second = 0;
rules_weather_out_store["40m"].minute = 40;
rules_weather_out_store["40m"].second = 0;
rules_weather_out_store["50m"].minute = 50;
rules_weather_out_store["50m"].second = 0;
// 임의의 사용자 데이터 등록
const coordinates = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment