ASOS_AAOS_AQ_pre_mrg.py
특정 사이트 자료 ASOS AAOS AQ 모두 합치기
import keras
print(keras.__version__)
import tensorflow as tf
print(tf.__version__)
import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "1" # for GPU_1
from __future__ import absolute_import, division, print_function, unicode_literals
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import pandas as pd
from download import download
import matplotlib.pyplot as plt
import seaborn as sns
2.7.0
2.7.0
# os.system("dir C:") #, shell=True)
# os.system("dir", shell=True)
file_asos = 'D:/dataset/my_data/ASOS_merg_2016-2021_136_Andong.csv'
file_aaos = 'D:/dataset/my_data/AAOS_merg_2016-2021_972_Ahndong.csv'
file_aq = 'D:/dataset/my_data/AQ_merge_2016-2021.csv'
asos = pd.read_csv(file_asos, encoding='UTF8') # cp949 로 하면 error 발생
aaos = pd.read_csv(file_aaos, encoding='UTF8') # cp949 로 하면 Date 포맷이 asos 와 불일치 하므로 UTF-8로 고정
aq = pd.read_csv(file_aq, encoding='cp949') #, index=0)
## 만약 에러가 발생하면, UTF8, cp949를 바꿔서 몇번 해 보면(?) 에러가 사라진다.
asos.head()
136 | 안동 | 2016-01-01 00:00:00 | -3.2 | 0.0 | NaN | 9.0 | 0.7 | 0.0 | 140.0 | ... | -5.1 | 0.0 | NaN | NaN | NaN | NaN | 2016 | 1 | 1 | 0 |
136 | 안동 | 2016-01-01 01:00:00 | -3.9 | 0.0 | NaN | NaN | 0.8 | 0.0 | 140.0 | ... | -5.4 | 0.0 | NaN | NaN | NaN | NaN | 2016 | 1 | 1 | 1 |
136 | 안동 | 2016-01-01 02:00:00 | -5.3 | 0.0 | NaN | NaN | 0.2 | 0.0 | 0.0 | ... | -6.2 | 0.0 | NaN | NaN | NaN | NaN | 2016 | 1 | 1 | 2 |
136 | 안동 | 2016-01-01 03:00:00 | -4.8 | 0.0 | NaN | NaN | 1.1 | 0.0 | 140.0 | ... | -6.2 | 0.0 | NaN | NaN | NaN | NaN | 2016 | 1 | 1 | 3 |
136 | 안동 | 2016-01-01 04:00:00 | -6.3 | 0.0 | NaN | NaN | 0.4 | 0.0 | 0.0 | ... | -6.9 | 0.0 | NaN | NaN | NaN | NaN | 2016 | 1 | 1 | 4 |
5 rows × 42 columns
if asos['Site'][1] == '안동':
site_name = 'Andong'
site_name
'Andong'
asos.drop(['Site_No', 'Site', 'Tair_flag', 'WS_flag', 'WD_flag', 'RH_flag',
'Pa_flag', 'Psfc_flag','Suntime_flag','Sunrad_flag',
'Snow_cm', 'Snow_3hr_cm','Sfc_stat','Tsfc_flag',
'Cloud_cover_total','Cloud_cover_mid_low','Cloud_type','Cloud_BH_100m',
'T5cm_C','T10cm_C','T20cm_C','T30cm_C',
'Year', 'Month', 'Day', 'Hour'], inplace =True, axis=1)
asos.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52608 entries, 0 to 52607
Data columns (total 16 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 52608 non-null object
1 Tair_C 52606 non-null float64
2 Rain_mm 4373 non-null float64
3 Rain_flag 10012 non-null float64
4 WS_m_s 52608 non-null float64
5 WD_16deg 52608 non-null float64
6 RH_pct 52608 non-null float64
7 Pv_hPa 52604 non-null float64
8 Td_C 52602 non-null float64
9 Pa_hPa 52604 non-null float64
10 Psfc_hPa 52605 non-null float64
11 Suntime_hr 28720 non-null float64
12 Sunrad_MJ_m2 28756 non-null float64
13 Vis_10m 52606 non-null float64
14 Phen 15943 non-null float64
15 Tsfc_C 52599 non-null float64
dtypes: float64(15), object(1)
memory usage: 6.4+ MB
aaos.drop(['Site_No', 'Site',
'Tsoil_20cm','Tsoil_30cm','Tsoil_0.5m','Tsoil_1.5m', 'Tsoil_3.0m', 'Tsoil_5.0m',
'Watrlev_cm'
], inplace=True, axis=1)
aaos.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51385 entries, 0 to 51384
Data columns (total 25 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 51385 non-null object
1 RH_0.5m 24687 non-null float64
2 RH_1.5m 51376 non-null float64
3 Msoil_10cm 50466 non-null float64
4 Msoil_20cm 50466 non-null float64
5 Msoil_30cm 50466 non-null float64
6 Msoil_50cm 50466 non-null float64
7 Tair_0.5m 24692 non-null float64
8 Tair_1.5m 51378 non-null float64
9 Tair_4.0m 24691 non-null float64
10 WS_1.5m 24510 non-null float64
11 WS_4.0m 24510 non-null float64
12 Tsfc_0m 51384 non-null float64
13 Tgrass_0m 50413 non-null float64
14 Tsoil_5cm 51385 non-null float64
15 Tsoil_10cm 51371 non-null float64
16 Tsoil_1.0m 51383 non-null float64
17 Radnet_MJ_m2 24694 non-null float64
18 Radglob_MJ_m2 24694 non-null float64
19 Radrefl_MJ_m2 24694 non-null float64
20 Illum_10lux 23683 non-null float64
21 Year 51385 non-null int64
22 Month 51385 non-null int64
23 Day 51385 non-null int64
24 Hour 51385 non-null int64
dtypes: float64(20), int64(4), object(1)
memory usage: 9.8+ MB
aq = aq[['Datetime', 'SO2', 'NO2', 'O3', 'PM10', 'PM25']]
aq.rename(columns = {'Datetime':'Date'}, inplace =True)
aq.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50400 entries, 0 to 50399
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 50400 non-null object
1 SO2 47034 non-null float64
2 NO2 45531 non-null float64
3 O3 46572 non-null float64
4 PM10 45919 non-null float64
5 PM25 33466 non-null float64
dtypes: float64(5), object(1)
memory usage: 2.3+ MB
asos.head()
2016-01-01 00:00:00 | -3.2 | NaN | 9.0 | 0.7 | 140.0 | 83.0 | 4.0 | -5.6 | 1015.4 | 1033.5 | NaN | NaN | 400.0 | 19.0 | -5.1 |
2016-01-01 01:00:00 | -3.9 | NaN | NaN | 0.8 | 140.0 | 85.0 | 3.9 | -6.0 | 1015.1 | 1033.2 | NaN | NaN | 300.0 | 19.0 | -5.4 |
2016-01-01 02:00:00 | -5.3 | NaN | NaN | 0.2 | 0.0 | 88.0 | 3.6 | -6.9 | 1015.1 | 1033.3 | NaN | NaN | 300.0 | 19.0 | -6.2 |
2016-01-01 03:00:00 | -4.8 | NaN | NaN | 1.1 | 140.0 | 87.0 | 3.7 | -6.6 | 1015.0 | 1033.2 | NaN | NaN | 300.0 | 19.0 | -6.2 |
2016-01-01 04:00:00 | -6.3 | NaN | NaN | 0.4 | 0.0 | 89.0 | 3.4 | -7.8 | 1015.3 | 1033.6 | NaN | NaN | 300.0 | 19.0 | -6.9 |
aaos.head()
2016-01-01 00:00:00 | 96.3 | 98.7 | 7.0 | 9.9 | 15.2 | 15.8 | -4.1 | -3.6 | -4.9 | ... | -1.1 | 6.8 | 5.65 | 10.63 | 4.98 | 578.0 | 2016 | 1 | 1 | 0 |
2016-01-01 01:00:00 | 97.2 | 99.8 | 6.5 | 10.0 | 15.2 | 15.8 | -5.0 | -4.5 | -5.9 | ... | -1.4 | 6.8 | -0.16 | 0.00 | 0.16 | 568.0 | 2016 | 1 | 1 | 1 |
2016-01-01 02:00:00 | 97.5 | 97.9 | 6.0 | 10.0 | 15.2 | 15.8 | -5.3 | -4.8 | -6.4 | ... | -1.8 | 6.7 | -0.32 | 0.02 | 0.34 | 607.0 | 2016 | 1 | 1 | 2 |
2016-01-01 03:00:00 | 97.4 | 98.7 | 5.7 | 10.1 | 15.2 | 15.8 | -5.8 | -5.5 | -6.7 | ... | -2.1 | 6.7 | -0.44 | 0.07 | 0.51 | 597.0 | 2016 | 1 | 1 | 3 |
2016-01-01 04:00:00 | 96.3 | 96.4 | 5.4 | 10.1 | 15.2 | 15.8 | -6.4 | -5.9 | -7.5 | ... | -2.5 | 6.7 | -0.55 | 0.14 | 0.69 | 579.0 | 2016 | 1 | 1 | 4 |
5 rows × 25 columns
aq.head()
2016-01-01 01:00:00 | 0.001 | 0.026 | 0.003 | 75.0 | NaN |
2016-01-01 02:00:00 | 0.001 | 0.022 | 0.003 | 81.0 | NaN |
2016-01-01 03:00:00 | 0.001 | 0.021 | 0.003 | 77.0 | NaN |
2016-01-01 04:00:00 | 0.001 | 0.019 | 0.003 | 73.0 | NaN |
2016-01-01 05:00:00 | 0.001 | 0.018 | 0.003 | 73.0 | NaN |
print(len(asos))
print(len(aaos))
print(len(aq))
52608
51385
50400
### ASOS 기준으로 AAOS 합치기. Datetime 컬럼 기준
# filename_out = "ASOSAQ_filled_" + "_" + str(site_name) + ".csv"
tmp = pd.merge(asos, aaos, how="outer") #주의 !! on='Date'를 사용하면 Date 51385 로 자료 없는 곳은 빠짐.
print(len(tmp))
tmp.head()
tmpfile='D:/dataset/my_data/mrg_asos_aaos_' + str(site_name) + '.csv'
tmp.to_csv(tmpfile, header=True, index=False)
52608
print(len(tmp))
52608
### 위에서 합친 ASOS-AAOS 기준으로 다시 AQ와 합치기 Datetime 컬럼 기준
mrg = pd.merge(tmp, aq, on='Date')
mrg.head()
2016-01-01 01:00:00 | -3.9 | NaN | NaN | 0.8 | 140.0 | 85.0 | 3.9 | -6.0 | 1015.1 | ... | 568.0 | 2016.0 | 1.0 | 1.0 | 1.0 | 0.001 | 0.026 | 0.003 | 75.0 | NaN |
2016-01-01 02:00:00 | -5.3 | NaN | NaN | 0.2 | 0.0 | 88.0 | 3.6 | -6.9 | 1015.1 | ... | 607.0 | 2016.0 | 1.0 | 1.0 | 2.0 | 0.001 | 0.022 | 0.003 | 81.0 | NaN |
2016-01-01 03:00:00 | -4.8 | NaN | NaN | 1.1 | 140.0 | 87.0 | 3.7 | -6.6 | 1015.0 | ... | 597.0 | 2016.0 | 1.0 | 1.0 | 3.0 | 0.001 | 0.021 | 0.003 | 77.0 | NaN |
2016-01-01 04:00:00 | -6.3 | NaN | NaN | 0.4 | 0.0 | 89.0 | 3.4 | -7.8 | 1015.3 | ... | 579.0 | 2016.0 | 1.0 | 1.0 | 4.0 | 0.001 | 0.019 | 0.003 | 73.0 | NaN |
2016-01-01 05:00:00 | -6.8 | NaN | NaN | 0.4 | 0.0 | 89.0 | 3.3 | -8.3 | 1014.8 | ... | 555.0 | 2016.0 | 1.0 | 1.0 | 5.0 | 0.001 | 0.018 | 0.003 | 73.0 | NaN |
5 rows × 45 columns
tmpfile='D:/dataset/my_data/mrg_asos_aaos_aq_' + str(site_name) + '.csv'
mrg.to_csv(tmpfile, header=True, index=False)
tmp2 = pd.read_csv(tmpfile)
tmp2.head()
2016-01-01 01:00:00 | -3.9 | NaN | NaN | 0.8 | 140.0 | 85.0 | 3.9 | -6.0 | 1015.1 | ... | 568.0 | 2016.0 | 1.0 | 1.0 | 1.0 | 0.001 | 0.026 | 0.003 | 75.0 | NaN |
2016-01-01 02:00:00 | -5.3 | NaN | NaN | 0.2 | 0.0 | 88.0 | 3.6 | -6.9 | 1015.1 | ... | 607.0 | 2016.0 | 1.0 | 1.0 | 2.0 | 0.001 | 0.022 | 0.003 | 81.0 | NaN |
2016-01-01 03:00:00 | -4.8 | NaN | NaN | 1.1 | 140.0 | 87.0 | 3.7 | -6.6 | 1015.0 | ... | 597.0 | 2016.0 | 1.0 | 1.0 | 3.0 | 0.001 | 0.021 | 0.003 | 77.0 | NaN |
2016-01-01 04:00:00 | -6.3 | NaN | NaN | 0.4 | 0.0 | 89.0 | 3.4 | -7.8 | 1015.3 | ... | 579.0 | 2016.0 | 1.0 | 1.0 | 4.0 | 0.001 | 0.019 | 0.003 | 73.0 | NaN |
2016-01-01 05:00:00 | -6.8 | NaN | NaN | 0.4 | 0.0 | 89.0 | 3.3 | -8.3 | 1014.8 | ... | 555.0 | 2016.0 | 1.0 | 1.0 | 5.0 | 0.001 | 0.018 | 0.003 | 73.0 | NaN |
5 rows × 45 columns