728x90
반응형

ASOS_AAOS_AQ_pre_mrg.py

 

특정 사이트 자료 ASOS AAOS AQ 모두 합치기

In [1]:
import keras
print(keras.__version__)
import tensorflow as tf
print(tf.__version__)
import os
#os.environ["CUDA_VISIBLE_DEVICES"] = "1"  # for GPU_1

from __future__ import absolute_import, division, print_function, unicode_literals
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import pandas as pd
from download import download

import matplotlib.pyplot as plt
import seaborn as sns
2.7.0
2.7.0
In [2]:
# os.system("dir C:") #, shell=True)
# os.system("dir", shell=True)
file_asos = 'D:/dataset/my_data/ASOS_merg_2016-2021_136_Andong.csv'
file_aaos = 'D:/dataset/my_data/AAOS_merg_2016-2021_972_Ahndong.csv'
file_aq = 'D:/dataset/my_data/AQ_merge_2016-2021.csv'

asos = pd.read_csv(file_asos, encoding='UTF8')  # cp949  로 하면 error 발생
aaos = pd.read_csv(file_aaos, encoding='UTF8')  # cp949 로 하면 Date 포맷이 asos 와 불일치 하므로 UTF-8로 고정
aq = pd.read_csv(file_aq, encoding='cp949') #, index=0)
## 만약 에러가 발생하면, UTF8, cp949를 바꿔서 몇번 해 보면(?) 에러가 사라진다.
In [3]:
asos.head()
Out[3]:
Site_NoSiteDateTair_CTair_flagRain_mmRain_flagWS_m_sWS_flagWD_16deg...Tsfc_CTsfc_flagT5cm_CT10cm_CT20cm_CT30cm_CYearMonthDayHour01234
136 안동 2016-01-01 00:00:00 -3.2 0.0 NaN 9.0 0.7 0.0 140.0 ... -5.1 0.0 NaN NaN NaN NaN 2016 1 1 0
136 안동 2016-01-01 01:00:00 -3.9 0.0 NaN NaN 0.8 0.0 140.0 ... -5.4 0.0 NaN NaN NaN NaN 2016 1 1 1
136 안동 2016-01-01 02:00:00 -5.3 0.0 NaN NaN 0.2 0.0 0.0 ... -6.2 0.0 NaN NaN NaN NaN 2016 1 1 2
136 안동 2016-01-01 03:00:00 -4.8 0.0 NaN NaN 1.1 0.0 140.0 ... -6.2 0.0 NaN NaN NaN NaN 2016 1 1 3
136 안동 2016-01-01 04:00:00 -6.3 0.0 NaN NaN 0.4 0.0 0.0 ... -6.9 0.0 NaN NaN NaN NaN 2016 1 1 4

5 rows × 42 columns

In [4]:
if asos['Site'][1] == '안동': 
    site_name = 'Andong'    
site_name
Out[4]:
'Andong'
In [5]:
asos.drop(['Site_No', 'Site', 'Tair_flag', 'WS_flag', 'WD_flag', 'RH_flag',
           'Pa_flag', 'Psfc_flag','Suntime_flag','Sunrad_flag',
           'Snow_cm', 'Snow_3hr_cm','Sfc_stat','Tsfc_flag',
           'Cloud_cover_total','Cloud_cover_mid_low','Cloud_type','Cloud_BH_100m',
           'T5cm_C','T10cm_C','T20cm_C','T30cm_C',
           'Year', 'Month', 'Day', 'Hour'], inplace =True, axis=1)
asos.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52608 entries, 0 to 52607
Data columns (total 16 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          52608 non-null  object 
 1   Tair_C        52606 non-null  float64
 2   Rain_mm       4373 non-null   float64
 3   Rain_flag     10012 non-null  float64
 4   WS_m_s        52608 non-null  float64
 5   WD_16deg      52608 non-null  float64
 6   RH_pct        52608 non-null  float64
 7   Pv_hPa        52604 non-null  float64
 8   Td_C          52602 non-null  float64
 9   Pa_hPa        52604 non-null  float64
 10  Psfc_hPa      52605 non-null  float64
 11  Suntime_hr    28720 non-null  float64
 12  Sunrad_MJ_m2  28756 non-null  float64
 13  Vis_10m       52606 non-null  float64
 14  Phen          15943 non-null  float64
 15  Tsfc_C        52599 non-null  float64
dtypes: float64(15), object(1)
memory usage: 6.4+ MB
In [6]:
aaos.drop(['Site_No', 'Site', 
           'Tsoil_20cm','Tsoil_30cm','Tsoil_0.5m','Tsoil_1.5m', 'Tsoil_3.0m', 'Tsoil_5.0m', 
           'Watrlev_cm'
           ], inplace=True, axis=1)
aaos.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51385 entries, 0 to 51384
Data columns (total 25 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           51385 non-null  object 
 1   RH_0.5m        24687 non-null  float64
 2   RH_1.5m        51376 non-null  float64
 3   Msoil_10cm     50466 non-null  float64
 4   Msoil_20cm     50466 non-null  float64
 5   Msoil_30cm     50466 non-null  float64
 6   Msoil_50cm     50466 non-null  float64
 7   Tair_0.5m      24692 non-null  float64
 8   Tair_1.5m      51378 non-null  float64
 9   Tair_4.0m      24691 non-null  float64
 10  WS_1.5m        24510 non-null  float64
 11  WS_4.0m        24510 non-null  float64
 12  Tsfc_0m        51384 non-null  float64
 13  Tgrass_0m      50413 non-null  float64
 14  Tsoil_5cm      51385 non-null  float64
 15  Tsoil_10cm     51371 non-null  float64
 16  Tsoil_1.0m     51383 non-null  float64
 17  Radnet_MJ_m2   24694 non-null  float64
 18  Radglob_MJ_m2  24694 non-null  float64
 19  Radrefl_MJ_m2  24694 non-null  float64
 20  Illum_10lux    23683 non-null  float64
 21  Year           51385 non-null  int64  
 22  Month          51385 non-null  int64  
 23  Day            51385 non-null  int64  
 24  Hour           51385 non-null  int64  
dtypes: float64(20), int64(4), object(1)
memory usage: 9.8+ MB
In [7]:
aq = aq[['Datetime', 'SO2', 'NO2', 'O3', 'PM10', 'PM25']]
aq.rename(columns = {'Datetime':'Date'}, inplace =True)
aq.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50400 entries, 0 to 50399
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    50400 non-null  object 
 1   SO2     47034 non-null  float64
 2   NO2     45531 non-null  float64
 3   O3      46572 non-null  float64
 4   PM10    45919 non-null  float64
 5   PM25    33466 non-null  float64
dtypes: float64(5), object(1)
memory usage: 2.3+ MB
In [8]:
asos.head()
Out[8]:
DateTair_CRain_mmRain_flagWS_m_sWD_16degRH_pctPv_hPaTd_CPa_hPaPsfc_hPaSuntime_hrSunrad_MJ_m2Vis_10mPhenTsfc_C01234
2016-01-01 00:00:00 -3.2 NaN 9.0 0.7 140.0 83.0 4.0 -5.6 1015.4 1033.5 NaN NaN 400.0 19.0 -5.1
2016-01-01 01:00:00 -3.9 NaN NaN 0.8 140.0 85.0 3.9 -6.0 1015.1 1033.2 NaN NaN 300.0 19.0 -5.4
2016-01-01 02:00:00 -5.3 NaN NaN 0.2 0.0 88.0 3.6 -6.9 1015.1 1033.3 NaN NaN 300.0 19.0 -6.2
2016-01-01 03:00:00 -4.8 NaN NaN 1.1 140.0 87.0 3.7 -6.6 1015.0 1033.2 NaN NaN 300.0 19.0 -6.2
2016-01-01 04:00:00 -6.3 NaN NaN 0.4 0.0 89.0 3.4 -7.8 1015.3 1033.6 NaN NaN 300.0 19.0 -6.9
In [9]:
aaos.head()
Out[9]:
DateRH_0.5mRH_1.5mMsoil_10cmMsoil_20cmMsoil_30cmMsoil_50cmTair_0.5mTair_1.5mTair_4.0m...Tsoil_10cmTsoil_1.0mRadnet_MJ_m2Radglob_MJ_m2Radrefl_MJ_m2Illum_10luxYearMonthDayHour01234
2016-01-01 00:00:00 96.3 98.7 7.0 9.9 15.2 15.8 -4.1 -3.6 -4.9 ... -1.1 6.8 5.65 10.63 4.98 578.0 2016 1 1 0
2016-01-01 01:00:00 97.2 99.8 6.5 10.0 15.2 15.8 -5.0 -4.5 -5.9 ... -1.4 6.8 -0.16 0.00 0.16 568.0 2016 1 1 1
2016-01-01 02:00:00 97.5 97.9 6.0 10.0 15.2 15.8 -5.3 -4.8 -6.4 ... -1.8 6.7 -0.32 0.02 0.34 607.0 2016 1 1 2
2016-01-01 03:00:00 97.4 98.7 5.7 10.1 15.2 15.8 -5.8 -5.5 -6.7 ... -2.1 6.7 -0.44 0.07 0.51 597.0 2016 1 1 3
2016-01-01 04:00:00 96.3 96.4 5.4 10.1 15.2 15.8 -6.4 -5.9 -7.5 ... -2.5 6.7 -0.55 0.14 0.69 579.0 2016 1 1 4

5 rows × 25 columns

In [10]:
aq.head()
Out[10]:
DateSO2NO2O3PM10PM2501234
2016-01-01 01:00:00 0.001 0.026 0.003 75.0 NaN
2016-01-01 02:00:00 0.001 0.022 0.003 81.0 NaN
2016-01-01 03:00:00 0.001 0.021 0.003 77.0 NaN
2016-01-01 04:00:00 0.001 0.019 0.003 73.0 NaN
2016-01-01 05:00:00 0.001 0.018 0.003 73.0 NaN
In [11]:
print(len(asos))
print(len(aaos))
print(len(aq))
52608
51385
50400
In [ ]:
 
In [12]:
### ASOS 기준으로 AAOS  합치기.  Datetime 컬럼 기준
# filename_out = "ASOSAQ_filled_" + "_" + str(site_name) + ".csv"
tmp = pd.merge(asos, aaos, how="outer")   #주의 !! on='Date'를 사용하면 Date 51385 로 자료 없는 곳은 빠짐.
print(len(tmp))
tmp.head()
tmpfile='D:/dataset/my_data/mrg_asos_aaos_' + str(site_name) + '.csv'
tmp.to_csv(tmpfile, header=True, index=False)
52608
In [13]:
print(len(tmp))
52608
In [14]:
### 위에서 합친 ASOS-AAOS 기준으로 다시 AQ와 합치기  Datetime 컬럼 기준
mrg = pd.merge(tmp, aq, on='Date')
mrg.head()
Out[14]:
DateTair_CRain_mmRain_flagWS_m_sWD_16degRH_pctPv_hPaTd_CPa_hPa...Illum_10luxYearMonthDayHourSO2NO2O3PM10PM2501234
2016-01-01 01:00:00 -3.9 NaN NaN 0.8 140.0 85.0 3.9 -6.0 1015.1 ... 568.0 2016.0 1.0 1.0 1.0 0.001 0.026 0.003 75.0 NaN
2016-01-01 02:00:00 -5.3 NaN NaN 0.2 0.0 88.0 3.6 -6.9 1015.1 ... 607.0 2016.0 1.0 1.0 2.0 0.001 0.022 0.003 81.0 NaN
2016-01-01 03:00:00 -4.8 NaN NaN 1.1 140.0 87.0 3.7 -6.6 1015.0 ... 597.0 2016.0 1.0 1.0 3.0 0.001 0.021 0.003 77.0 NaN
2016-01-01 04:00:00 -6.3 NaN NaN 0.4 0.0 89.0 3.4 -7.8 1015.3 ... 579.0 2016.0 1.0 1.0 4.0 0.001 0.019 0.003 73.0 NaN
2016-01-01 05:00:00 -6.8 NaN NaN 0.4 0.0 89.0 3.3 -8.3 1014.8 ... 555.0 2016.0 1.0 1.0 5.0 0.001 0.018 0.003 73.0 NaN

5 rows × 45 columns

In [16]:
tmpfile='D:/dataset/my_data/mrg_asos_aaos_aq_' + str(site_name) + '.csv'
mrg.to_csv(tmpfile, header=True, index=False)
In [17]:
tmp2 = pd.read_csv(tmpfile)
tmp2.head()
Out[17]:
DateTair_CRain_mmRain_flagWS_m_sWD_16degRH_pctPv_hPaTd_CPa_hPa...Illum_10luxYearMonthDayHourSO2NO2O3PM10PM2501234
2016-01-01 01:00:00 -3.9 NaN NaN 0.8 140.0 85.0 3.9 -6.0 1015.1 ... 568.0 2016.0 1.0 1.0 1.0 0.001 0.026 0.003 75.0 NaN
2016-01-01 02:00:00 -5.3 NaN NaN 0.2 0.0 88.0 3.6 -6.9 1015.1 ... 607.0 2016.0 1.0 1.0 2.0 0.001 0.022 0.003 81.0 NaN
2016-01-01 03:00:00 -4.8 NaN NaN 1.1 140.0 87.0 3.7 -6.6 1015.0 ... 597.0 2016.0 1.0 1.0 3.0 0.001 0.021 0.003 77.0 NaN
2016-01-01 04:00:00 -6.3 NaN NaN 0.4 0.0 89.0 3.4 -7.8 1015.3 ... 579.0 2016.0 1.0 1.0 4.0 0.001 0.019 0.003 73.0 NaN
2016-01-01 05:00:00 -6.8 NaN NaN 0.4 0.0 89.0 3.3 -8.3 1014.8 ... 555.0 2016.0 1.0 1.0 5.0 0.001 0.018 0.003 73.0 NaN

5 rows × 45 columns

728x90
반응형
728x90
반응형

ASOS_pre0_allsite_with_figs_OK.py

ASOS 전처리 Step 1

1) 기상데이터 포털에서 각 연도별 OBS_ASOS_TIM_XXXX.csv 다운로드

2) 각 사이트별로 변수 그림 그리고, 값이 존재하는 사용 가능한 변수명 확인

In [1]:
import keras
print(keras.__version__)
import tensorflow as tf
print(tf.__version__)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

from __future__ import absolute_import, division, print_function, unicode_literals
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import pandas as pd
from download import download

import matplotlib.pyplot as plt
import seaborn as sns


mpl.rcParams['figure.figsize'] = (8,6)
mpl.rcParams['axes.grid'] = False
2.7.0
2.7.0

(1) OBS_ASOS_TIM_comb.csv 파일 읽기

변수명 영문으로 변환

In [24]:
dir_in = "D:/dataset/ASOS"
dir_out = "D:/dataset/my_data"
filename_in = "OBS_ASOS_TIM_comb.csv"
# filename_out = "OBS_ASOS_2015_136.csv"
# print(os.path.isdir(dir)); print(os.path.isfile(os.path.join(dir,file)))
infile = os.path.join(dir_in,filename_in)
# outfile = os.path.join(dir_out, filename_out)
print(infile)
# din = pd.read_csv(infile, encoding='cp949', low_memory=False)   #  자료형이 섞여 있어서 low_memory=False 해 줘야 된다. 
din = pd.read_csv(infile, encoding='UTF8',low_memory=False)   # 'cp949' 에러나면 UTF8으로 
D:/dataset/ASOS\OBS_ASOS_TIM_comb.csv
In [18]:
din.head()
Out[18]:
Site_NoSiteDateTair_CTair_flagRain_mmRain_flagWS_m_sWS_flagWD_16deg...Cloud_BH_100mVis_10mSfc_statPhenTsfc_CTsfc_flagT5cm_CT10cm_CT20cm_CT30cm_C01234
90 속초 2015-01-01 0:00 NaN NaN NaN NaN 3.5 0.0 290.0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
90 속초 2015-01-01 1:00 -5.0 0.0 NaN NaN 3.9 0.0 270.0 ... NaN NaN NaN NaN -3.4 0.0 NaN NaN NaN NaN
90 속초 2015-01-01 2:00 -5.6 0.0 NaN NaN 2.7 0.0 320.0 ... NaN NaN NaN NaN -3.9 0.0 NaN NaN NaN NaN
90 속초 2015-01-01 3:00 -6.2 0.0 NaN NaN 2.1 0.0 270.0 ... 8.0 2000.0 0.0 NaN -4.3 0.0 NaN NaN NaN NaN
90 속초 2015-01-01 4:00 -6.5 0.0 NaN NaN 1.7 0.0 230.0 ... 10.0 2000.0 NaN NaN -4.7 0.0 NaN NaN NaN NaN

5 rows × 38 columns

In [25]:
## 컬럼명 영문으로 변경
din.columns = ['Site_No', 'Site', 'Date', 'Tair_C', 'Tair_flag', 'Rain_mm',
       'Rain_flag', 'WS_m_s', 'WS_flag', 'WD_16deg', 'WD_flag', 'RH_pct',
       'RH_flag', 'Pv_hPa', 'Td_C', 'Pa_hPa', 'Pa_flag', 'Psfc_hPa',
       'Psfc_flag', 'Suntime_hr', 'Suntime_flag', 'Sunrad_MJ_m2',
       'Sunrad_flag', 'Snow_cm', 'Snow_3hr_cm', 'Cloud_cover_total',
       'Cloud_cover_mid_low', 'Cloud_type', 'Cloud_BH_100m', 'Vis_10m',
       'Sfc_stat', 'Phen', 'Tsfc_C', 'Tsfc_flag', 'T5cm_C', 'T10cm_C',
       'T20cm_C', 'T30cm_C']
print(din.dtypes)
Site_No                  int64
Site                    object
Date                    object
Tair_C                 float64
Tair_flag              float64
Rain_mm                float64
Rain_flag              float64
WS_m_s                 float64
WS_flag                float64
WD_16deg               float64
WD_flag                float64
RH_pct                 float64
RH_flag                float64
Pv_hPa                 float64
Td_C                   float64
Pa_hPa                 float64
Pa_flag                float64
Psfc_hPa               float64
Psfc_flag              float64
Suntime_hr             float64
Suntime_flag           float64
Sunrad_MJ_m2           float64
Sunrad_flag            float64
Snow_cm                float64
Snow_3hr_cm            float64
Cloud_cover_total      float64
Cloud_cover_mid_low    float64
Cloud_type              object
Cloud_BH_100m          float64
Vis_10m                float64
Sfc_stat               float64
Phen                   float64
Tsfc_C                 float64
Tsfc_flag              float64
T5cm_C                 float64
T10cm_C                float64
T20cm_C                float64
T30cm_C                float64
dtype: object
In [26]:
print(din['Site_No'].unique())
print(din['Site'].unique())
[ 90  95  98  99 100 101 102 104 105 106 108 112 114 115 116 119 121 127
 129 130 131 133 135 136 137 138 140 143 146 152 155 156 159 162 165 168
 169 170 172 174 175 176 177 184 185 187 188 189 192 201 202 203 211 212
 216 217 221 226 232 235 236 238 243 244 245 247 248 251 252 253 254 255
 257 258 259 260 261 262 263 264 266 268 271 272 273 276 277 278 279 281
 283 284 285 288 289 294 295  93 239]
['속초' '철원' '동두천' '파주' '대관령' '춘천' '백령도' '북강릉' '강릉' '동해' '서울' '인천' '원주'
 '울릉도' '관악산' '수원' '영월' '충주' '서산' '울진' '청주' '대전' '추풍령' '안동' '상주' '포항' '군산'
 '대구' '전주' '울산' '창원' '광주' '부산' '통영' '목포' '여수' '흑산도' '완도' '고창' '순천'
 '진도(첨찰산)' '대구(기)' '홍성' '제주' '고산' '성산' '서귀포' '진주' '강화' '양평' '이천' '인제' '홍천'
 '태백' '정선군' '제천' '보은' '천안' '보령' '부여' '금산' '부안' '임실' '정읍' '남원' '장수' '고창군'
 '영광군' '김해시' '순창군' '북창원' '양산시' '보성군' '강진군' '장흥' '해남' '고흥' '의령군' '함양군'
 '광양시' '진도군' '봉화' '영주' '문경' '청송군' '영덕' '의성' '구미' '영천' '경주시' '거창' '합천' '밀양'
 '산청' '거제' '남해' '북춘천' '세종']
In [27]:
din['Site'].loc[din['Site_No'] == 93]
Out[27]:
828319     북춘천
828320     북춘천
828321     북춘천
828322     북춘천
828323     북춘천
          ... 
4996669    북춘천
4996670    북춘천
4996671    북춘천
4996672    북춘천
4996673    북춘천
Name: Site, Length: 46031, dtype: object

(2) 각 사이트별 관측 변수 그림 그리고, 사용가능한 변수 체크

In [28]:
subsite = din.loc[din['Site_No'] == SITE]
ncol=4; nrow=7
plt.figure(figsize=(20,30))
plt.subplot(nrow,ncol,1);plt.plot(subsite['Tair_C']); plt.title("Tair")
plt.subplot(nrow,ncol,2);plt.plot(subsite['Td_C']); plt.title("Td")
plt.subplot(nrow,ncol,3);plt.plot(subsite['Tsfc_C']); plt.title("Tsfc")
plt.subplot(nrow,ncol,4);plt.plot(subsite['T5cm_C']); plt.title("T5cm")
plt.subplot(nrow,ncol,5);plt.plot(subsite['T10cm_C']); plt.title("T10cm")
plt.subplot(nrow,ncol,6);plt.plot(subsite['T20cm_C']); plt.title("T20cm")
plt.subplot(nrow,ncol,7);plt.plot(subsite['T30cm_C']); plt.title("T30cm")
plt.subplot(nrow,ncol,8);plt.plot(subsite['RH_pct']); plt.title("RH")
plt.subplot(nrow,ncol,9);plt.plot(subsite['WS_m_s']); plt.title("WS")
plt.subplot(nrow,ncol,10);plt.plot(subsite['WD_16deg']); plt.title("WD")
plt.subplot(nrow,ncol,11);plt.plot(subsite['Rain_mm']); plt.title("Rain")
plt.subplot(nrow,ncol,12);plt.plot(subsite['Snow_cm']); plt.title("Snow")
plt.subplot(nrow,ncol,13);plt.plot(subsite['Snow_3hr_cm']); plt.title("Snow_3hr")
plt.subplot(nrow,ncol,14);plt.plot(subsite['Pa_hPa']); plt.title("Pa")
plt.subplot(nrow,ncol,15);plt.plot(subsite['Pv_hPa']); plt.title("Pv")
plt.subplot(nrow,ncol,16);plt.plot(subsite['Psfc_hPa']); plt.title("Psfc")
plt.subplot(nrow,ncol,17);plt.plot(subsite['Sunrad_MJ_m2'], color="red"); plt.title("Sunrad")
plt.subplot(nrow,ncol,18);plt.plot(subsite['Suntime_hr']); plt.title("Suntime")
plt.subplot(nrow,ncol,19);plt.plot(subsite['Cloud_cover_total']); plt.title("Cloud_cover")
plt.subplot(nrow,ncol,20);plt.plot(subsite['Cloud_cover_mid_low']); plt.title("Cloud_ML")
# plt.subplot(nrow,ncol,21);plt.plot(subsite['Cloud_type']); plt.title("Cloud_type")
plt.subplot(nrow,ncol,22);plt.plot(subsite['Cloud_BH_100m']); plt.title("Cloud_BH")
plt.subplot(nrow,ncol,23);plt.plot(subsite['Sfc_stat']); plt.title("Sfc")
plt.subplot(nrow,ncol,24);plt.plot(subsite['Phen']); plt.title("Phenomenon")
plt.subplot(nrow,ncol,25);plt.plot(subsite['Vis_10m']); plt.title("Vis")

print(din['Site'].loc[din['Site_No'] == SITE].unique())
['안동']
In [ ]:
 
728x90
반응형
728x90
반응형
print(len(df))
date_data = pd.date_range(start='2016-01-01', end='2022-01-01',  freq='H')
dat  = date_data.to_list()
print(len(dat)-1)

52608

52608

 

tmp = pd.merge(asos, aaos, how="outer")  
print(len(tmp))

52608

앞에서 만든 1시간 간격 날짜 데이터에 맞춰서 merge 됨. 

 

tmp = pd.merge(asos, aaos, on="Date")  
print(len(tmp))

51385

 #주의 !! on='Date'를 사용하면 Date 51385 로 자료 없는 곳은 생략하고 merge 됨.

 

 

728x90
반응형
728x90
반응형

1차 데이터셋:  qc_ASOS 데이터 (2-3시간 차이)

2차 데이터셋:  mrg_ASOS_AAOS 데이터 (2-3시간 차이)  x에 vis_log 가 없는 경우 y=flag

3차 데이터셋:  mrg_ASOS_AAOS 데이터 (2-3시간 차이)  x에 vis_log 를 추가하고 y=flag

 

데이터셋 주의 사항

vis_10m는 제외해야 된다.

(오로지 flag로만 비교할때 보다, vis_log가 남아 있을 때 결과가 약간 더 향상되는 경우가 있다. )

 

 

의사결정 나무

                 |   1차          2차 > 3차

---------------------------

 precision |   1.0

  recall      |   0.50     

  f1-score  |  0.53      

  support   |  0.52   

2차

              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99     10227
         1.0       0.60      0.66      0.63       294

    accuracy                           0.98     10521
   macro avg       0.79      0.82      0.81     10521
weighted avg       0.98      0.98      0.98     10521

 

3차

           precision    recall  f1-score   support

         0.0       0.99      0.99      0.99     10227
         1.0       0.59      0.62      0.61       294

    accuracy                           0.98     10521
   macro avg       0.79      0.81      0.80     10521
weighted avg       0.98      0.98      0.98     10521

랜덤포레스트

                 |   1차     2차 < 3차 

---------------------------

 precision |   1.0

  recall      |   1.00     

  f1-score  |  0.09      

  support   |  0.16 

 

2차 

              precision    recall  f1-score   support

         0.0       0.97      1.00      0.99     10227
         1.0       1.00      0.07      0.13       294

    accuracy                           0.97     10521
   macro avg       0.99      0.53      0.56     10521
weighted avg       0.97      0.97      0.96     10521

3차

  precision    recall  f1-score   support

         0.0       0.98      1.00      0.99     10227
         1.0       0.87      0.41      0.56       294

    accuracy                           0.98     10521
   macro avg       0.92      0.71      0.78     10521
weighted avg       0.98      0.98      0.98     10521

 

나이브 베이즈

                 |   1차     2차 < 3차 

---------------------------

 precision |   1.0

  recall      |   1.0     

  f1-score  |  0.09      

  support   |  0.16 

 

2차

            precision    recall  f1-score   support

         0.0       0.97      1.00      0.99     10227
         1.0       1.00      0.07      0.13       294

    accuracy                           0.97     10521
   macro avg       0.99      0.53      0.56     10521
weighted avg       0.97      0.97      0.96     10521

3차

    precision    recall  f1-score   support

         0.0       0.98      1.00      0.99     10227
         1.0       0.87      0.41      0.56       294

    accuracy                           0.98     10521
   macro avg       0.92      0.71      0.78     10521
weighted avg       0.98      0.98      0.98     10521

에이다 부스트

                 |   1차        2차 < 3차 

---------------------------

 precision |   1.0

  recall      |   0.75     

  f1-score  |  0.54      

  support   |  0.63   

2차

            precision    recall  f1-score   support

         0.0       0.99      0.99      0.99     10227
         1.0       0.75      0.57      0.65       294

    accuracy                           0.98     10521
   macro avg       0.87      0.78      0.82     10521
weighted avg       0.98      0.98      0.98     10521

3차

     precision    recall  f1-score   support

         0.0       0.99      0.99      0.99     10227
         1.0       0.76      0.59      0.66       294

    accuracy                           0.98     10521
   macro avg       0.87      0.79      0.83     10521
weighted avg       0.98      0.98      0.98     10521

 

그레디언트 부스트 (시간 걸림)

                 |   1차        2차 > 3차

---------------------------

 precision |   1.0

  recall      |   0.38     

  f1-score  |  0.03      

  support   |  0.06

2차

           precision    recall  f1-score   support

         0.0       0.98      1.00      0.99     10227
         1.0       0.78      0.33      0.47       294

    accuracy                           0.98     10521
   macro avg       0.88      0.67      0.73     10521
weighted avg       0.98      0.98      0.97     10521

3차

         precision    recall  f1-score   support

         0.0       0.98      1.00      0.99     10227
         1.0       0.80      0.32      0.46       294

    accuracy                           0.98     10521
   macro avg       0.89      0.66      0.72     10521
weighted avg       0.98      0.98      0.97     10521

 

스태킹

                 |   1차       2차 = 3차.

---------------------------

 precision |   1.0

  recall      |   0.70     

  f1-score  |  0.56      

  support   |  0.62 

 

2차

             precision    recall  f1-score   support

         0.0       0.99      0.99      0.99     10227
         1.0       0.75      0.55      0.64       294

    accuracy                           0.98     10521
   macro avg       0.87      0.77      0.81     10521
weighted avg       0.98      0.98      0.98     10521

3차

            precision    recall  f1-score   support

         0.0       0.99      1.00      0.99     10227
         1.0       0.76      0.55      0.64       294

    accuracy                           0.98     10521
   macro avg       0.88      0.77      0.81     10521
weighted avg       0.98      0.98      0.98     10521

 

728x90
반응형
728x90
반응형

모델링 평가자료

1차 데이터셋:  qc_ASOS 데이터 (2-3시간 차이)

2차 데이터셋:  mrg_ASOS_AAOS 데이터 (1-2시간 차이) 오직 y=flag

3차 데이터셋:  mrg_ASOS_AAOS 데이터 (1-2시간 차이)  x에 vis_log 를 추가하고 y=flag

 

데이터셋 주의 사항

vis_10m는 제외해야 된다.

오로지 flag로만 비교할때 보다, vis_log가 남아 있을 때 결과가 약간 더 향상되는 경우가 있다. 

 

 

코드 예제 - 의사결정 나무

#의사결정 나무
## 데이터 학습
from sklearn import tree
clf_tree = tree.DecisionTreeClassifier(random_state=0)
clf_tree.fit(X_train_std, y_train)

## 데이터 예측
pred_tree = clf_tree.predict(X_test_std)
print(pred_tree)

## 모델  스코어 평가
get_clf_eval(y_test, pred_tree)

## 분류 리포트 확인
from sklearn.metrics import classification_report
class_report = classification_report(y_test, pred_tree)
print(class_report)

 

분류 리포트 (2차 결과) 

의사결정 나무

                 |   1차         2차 > 3차 

---------------------------

 precision |   1.0

  recall      |   0.50     

  f1-score  |  0.53      

  support   |  0.52      

2차

              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99     10198
         1.0       0.81      0.81      0.81       324

    accuracy                           0.99     10522
   macro avg       0.90      0.90      0.90     10522
weighted avg       0.99      0.99      0.99     10522

3차

  precision    recall  f1-score   support

         0.0       0.99      0.99      0.99     10198
         1.0       0.81      0.80      0.80       324

    accuracy                           0.99     10522
   macro avg       0.90      0.90      0.90     10522
weighted avg       0.99      0.99      0.99     10522

랜덤포레스트

                 |   1차     2차 < 3차 

---------------------------

 precision |   1.0

  recall      |   1.00     

  f1-score  |  0.09      

  support   |  0.16      

2차

             precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     10198
         1.0       1.00      0.70      0.82       324

    accuracy                           0.99     10522
   macro avg       1.00      0.85      0.91     10522
weighted avg       0.99      0.99      0.99     10522

3차

              precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     10198
         1.0       1.00      0.74      0.85       324

    accuracy                           0.99     10522
   macro avg       0.99      0.87      0.92     10522
weighted avg       0.99      0.99      0.99     10522

 

나이브 베이즈

                 |   1차     2차 < 3차 

---------------------------

 precision |   1.0

  recall      |   1.0     

  f1-score  |  0.09      

  support   |  0.16      

2차

       precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     10198
         1.0       1.00      0.70      0.82       324

    accuracy                           0.99     10522
   macro avg       1.00      0.85      0.91     10522
weighted avg       0.99      0.99      0.99     10522

3차

              precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     10198
         1.0       1.00      0.74      0.85       324

    accuracy                           0.99     10522
   macro avg       0.99      0.87      0.92     10522
weighted avg       0.99      0.99      0.99     10522

에이다 부스트

                 |   1차        2차 < 3차 

---------------------------

 precision |   1.0

  recall      |   0.75     

  f1-score  |  0.54      

  support   |  0.63   

2차

           precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     10198
         1.0       0.93      0.79      0.85       324

    accuracy                           0.99     10522
   macro avg       0.96      0.89      0.93     10522
weighted avg       0.99      0.99      0.99     10522

3차

             precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     10198
         1.0       0.93      0.82      0.87       324

    accuracy                           0.99     10522
   macro avg       0.96      0.91      0.93     10522
weighted avg       0.99      0.99      0.99     10522

그레디언트 부스트 (시간 걸림)

                 |   1차        2차 > 3차

---------------------------

 precision |   1.0

  recall      |   0.38     

  f1-score  |  0.03      

  support   |  0.06   

2차

             precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     10198
         1.0       0.92      0.83      0.87       324

    accuracy                           0.99     10522
   macro avg       0.96      0.91      0.94     10522
weighted avg       0.99      0.99      0.99     10522

3차

             precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     10198
         1.0       0.92      0.81      0.86       324

    accuracy                           0.99     10522
   macro avg       0.96      0.90      0.93     10522
weighted avg       0.99      0.99      0.99     10522

 

스태킹

                 |   1차       2차 = 3차.

---------------------------

 precision |   1.0

  recall      |   0.70     

  f1-score  |  0.56      

  support   |  0.62      

2차

             precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     10198
         1.0       0.96      0.77      0.85       324

    accuracy                           0.99     10522
   macro avg       0.98      0.88      0.92     10522
weighted avg       0.99      0.99      0.99     10522

3차

             precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     10198
         1.0       0.96      0.77      0.85       324

    accuracy                           0.99     10522
   macro avg       0.98      0.88      0.92     10522
weighted avg       0.99      0.99      0.99     10522

 

728x90
반응형
728x90
반응형
  1. Park, C., S.-H. Lee*, Comparison of Machine Learning Model Performance based on Observation Methods using Naked-eye and Visibility-meter, J. Korean Earth Sci. Soc., 2023, 44(2), 105-118. 머신러닝을 이용한 안개 예측 시 목측과 시정계 계측 방법에 따른모델 성능 차이 비교
  2. Park, C., Y.-T. Lee, S.-H. Lee*, Characteristics of atmospheric CO2 fluxes and the estimation of their potential sources around Boseong Standard Weather Observatory (BSWO), Atmos. Environ, 2021, 252, 118340 https://www.sciencedirect.com/science/article/abs/pii/S1352231021001588
  3. Kang, D. C. Park, W.-S. Jung, K.O. Lee and H.W. Lee, Effects of meteorology and air pollution on respiratory disease affecting vulnerable populations, J. Environ. Sci. Int., 2020, 29(6), 579-603. (in Korean with English abstract) 기상 및 대기질이 건강취약집단에 미치는 영향 https://koreascience.kr/article/JAKO202022449681263.page
  4. Park, C.*, S.-Y. Park, K.R. Gurney, C. Gerbig, J. P. DiGangi, Y. Choi, and H.W. Lee, Numerical simulation of atmospheric CO2 concentration and flux over the Korean Peninsula using WRF-VPRM model during Korus-AQ 2016 campaign, 2020, PloS ONE 15(1): e0228106. https://doi.org/10.1371/journal.pone.0228106
  5. Yoo, J.-W, W. Jeon, S.-Y. Park, C. Park, J. Jung, S.-H. Lee, H. W. Lee*, Investigating the regional difference of aerosol feedback effects over South Korea using the WRF-CMAQ two-way coupled modeling system, Atmos. Environ., 2019, 218(1), 116968. https://www.sciencedirect.com/science/article/abs/pii/S1352231019306077
  6. Park, S.-Y, C. Park, J.-W. You, S.-H. Lee and H. W. Lee*, Adjoint sensitivity of inland ozone to its precursors and meteorological and chemical influences, Atmos. Environ., 2018, 192, 104-115 https://pubs.acs.org/doi/full/10.1021/es052135g
  7. Park, C.*, C. Gerbig, S. Newman, R. Ahmadov, S. Feng, K.R. Gurney, G. R. Carmichael, S.-Y. Park, H.-W. Lee, M. Goulden, J. Stutz, J. Peischl, and T. Ryerson, CO2 transport, variability, and budget over the southern California air basin using the high-resolution WRF-VPRM model during the CalNex 2010 campaign, J. Appl. Meteoro. Climatol., 2018, 57(6), 1558-8424. https://journals.ametsoc.org/view/journals/apme/57/6/jamc-d-17-0358.1.xml
  8. Park, C.*, K.O. Lee and W.S. Jung, Comparison of aircraft observations and simulation results of atmospheric CO2 over coastal basin areas, J. Environ. Sci. Int., 2017, 26(6), 1-8. (in Korean with English abstract) https://koreascience.kr/article/JAKO201719950757916.page
  9. Park, C.* and H.W. Lee, Simulation of atmospheric CO2 over coastal basin using areas using meteorology-vegetation model, J. Environ. Sci. Int., 2017, 26(6), 9-18. (in Korean with English abstract) http://journal.kenss.or.kr/journal/article.php?code=52796&list.php?m=1&keyword=Synthesis 
  10. Park, C., and G.W. Schade*, Anthropogenic and biogenic features of long-term measured CO2 flux in north downtown Houston, Texas. J. Environ. Qual., 2016, 45, 253-265. https://acsess.onlinelibrary.wiley.com/doi/full/10.2134/jeq2015.02.0115
  11. Park, C., G.W. Schade*, N.D. Werner, D.J. Sailor, and C.H. Kim, Comparative estimates of anthropogenic heat emission in relation to surface energy balance of a subtropical urban neighborhood. Atmos. Environ., 2016, 126, 182-191. https://www.sciencedirect.com/science/article/pii/S1352231015305458
  12. Kim, D.H., H.W. Lee, S.Y. Park, J.W. Yu, C. Park, and J.H. Park, , Correction of surface characteristics to diagnostic wind modeling and its impact on potentials of wind power density, J. Renewable Sustainable Energy, 2014, 6, 042012 (published on Aug. 19, 2014) https://pubs.aip.org/aip/jrse/article/6/4/042012/1017536/Correction-of-surface-characteristics-to
  13. Jeon, W.-B., S.-H. Lee, H.W. Lee, C. Park, D.-H. Kim and S.-Y. Park, , A study on high ozone formation mechanism associated with change of NOx/VOCs ratio at a rural area in the Korean Peninsula, Atmos. Environ., 2014, 89, 10-21 (published Feb. 6, 2014) https://www.sciencedirect.com/science/article/pii/S1352231014001010
  14. Kota, S.H., Q. Ying, C. Park, and G.W. Schade, Estimation of VOC emission factors from flux measurements using a receptor model and footprint analysis. Atmos. Environ., 2014, 82, 24-35. https://www.sciencedirect.com/science/article/pii/S1352231013007401
  15. Ji, H.E., S.H. Lee, C. Park and H.W. Lee, A case study on sea breeze circulation and ozone concentration due to the effect of cold water in the southeastern coastal area of Korea, J. Environ. Sci. Int., 2014, 23(3), 261-274 (in Korean with English abstract) 한국 남동연안의 냉수대 영향에 의한 해풍순환과 오존농도의 사례연구 http://www.koreascience.or.kr/article/JAKO201408439036524.page
  16. Lee, K.O., M.J. Kim, K.Y. Lee, D.B. Kang, C. Park, H.W. Lee and W.S. Jung, Analysis of building energy usring meteorological numerical simulation data over Busan metropolitan areas, J. Environ. Sci. Int., 2014, 23(3),503-510 (in Korean with English abstract) http://journal.kenss.or.kr/journal/article.php?code=1777 
  17. Lee, K.O.,K.Y. Lee, D.B. Kang, C. Park, and W.S. Jung, A study of the application of small wind power system using meteorological simulation data in Busan, J. Environ. Sci. Int., 2014, 23(6),1085-1093 (in Korean with English abstract) 기상수치모의 자료를 이용한 부산지역의 소형풍력발전 시스템 적용에 관한 연구 https://koreascience.kr/article/JAKO201418964310320.page
  18. Worton, D.R., J.D. Surratt, B.W. LaFranchi, A.W.H. Chan, Y.Zhao, R.J. Weber, J.-H. Park, J.B. Gilman, J. deGouw, C. Park, G. Schade, M. Beaver, J.M. St. Clair, J. crounse, P. Wennberg, G.M. Wolfe, S.Harrold, J.A. Thornton, D.K. Farmer, K.S. Docherty, M.J. Cubison, J-L. Jimenez, A.A. Frossard, L.M. Russell, K. Kristensen, M. Glasius, J. Mao, X. Ren, W. Brune, ElC. Brouwne, S.E. Pusede, R.C.Cohen, J.H. Seinfeld, and A.H. Goldstein, Observational insights into aerosol formation from isoprene, Environ. Sci. Technol. 2013, 47, 11403−11413(published Sep. 4, 2013) https://pubs.acs.org/doi/10.1021/es4011064
  19. Mao, J.*, X. Ren, L. Zhang, D.M. VanDuin, R.C. Cohen, J.-H. Park, A.H. Goldstein, F. Paulot, M.R. Beaver, J.D. Crounse, P.O. Wennberg, J.P. DiGangi, S.B. Henry, F.N. Keutsch, C. Park, G.W.  Schade, G.M. Wolfe, J.A. Thornton, and W.H. Brune, Insights into hydroxyl measurements and atmospheric oxidation in a California forest, Atmos. Chem. Phys., 2012, 12, 8009-8020, doi:10.5194/acp-12-8009-2012 (published in Sep. 7, 2012) https://acp.copernicus.org/articles/12/8009/2012/acp-12-8009-2012.html
  20. Zhang, H., D.R. Worton, M. Lewandowski, J. Ortega, C.L. Rubitschun, J.H. Park, K. Kristensen, P. Campuzano-Jost, D. Day, J.L. Jimenez, J. Mohammed, J.H. Offenberg, T.E. Kleindienst, J. Gilman, W.C. Kuster, J. deGouw, C. Park, G.W. Schade, A.A. Frossard, L.M. Russell, L. Kaser, W. Jud, A. Hansel, L. Cappellin, T. Karl, M. Glasius, A. Guenther, A.H. Goldstein, J.H. Seinfeld, A. Gold, R.M. Kamens, and J.D. Surratt*, Organosulfate as Tracers for Secondary Organic Aerosol (SOA) Formation from 2-Methyl-3-Buten-2-ol (MBO) in the Atmosphere, Environ. Sci. Technol., 2012, 46(17), pp 9437-9446, doi:10.1021/es301648z (published in July 31, 2012) https://pubs.acs.org/doi/10.1021/es301648z
  21. Beaver, M.R.*, J.M. St.Clair, F. Paulot, K.M. Spencer, J.D. Crounse, B.W. LaFranchi, K.E. Min, S.E. Pusede, P.J. Wooldridge, G.W. Schade, C. Park, R.C. Cohen, and P.O. Wennberg,  Importance of biogenic precursors to the budget of organic nitrates: observations of multifunctional organic nitrates by CIMS and TD-LIF during BEARPEX 2009, Atmos. Chem. Phys., 2012, 12, 5773-5785, doi:10.5194/acp-12-5773-2012 (published in Jan. 4, 2012) https://acp.copernicus.org/articles/12/5773/2012/
  22. Park, C., G.W. Schade*, and I. Boedeker, Characteristic of the flux of isoprene and its oxidation products in an urban area. J. Geophys. Res., 2011, 116(D21), doi:10.1029/2011JD015856 (published in Nov. 16, 2011) https://agupubs.onlinelibrary.wiley.com/doi/full/10.1029/2011JD015856
  23. Schade G.W.*, S. Khan, C. Park, and I. Boedeker, Rural southeast Texas air quality measurements during the 2006 Texas Air Quality Study. J. Air & Waste Manage. Assoc., 2011, 61(10), pp. 1070-1081. doi:10.1080/10473289.2011.608621 (published in Sep. 27, 2011) https://www.tandfonline.com/doi/full/10.1080/10473289.2011.608621
  24. Park, C., G.W. Schade*, and I. Boedeker, Flux measurement of volatile organic compounds by the relaxed eddy accumulation method combined with a GC-FID system north of downtown Houston, Texas, Atmos. Environ. 2010, 44(21-22), pp. 2605-2614, doi:10.1016/j.atmosenv.2010.04.016 (published in July, 2010) https://www.sciencedirect.com/science/article/pii/S1352231010002980

 

 

728x90
반응형

728x90
반응형
! 온도변환 인터페이스 사용 버전 p222
PROGRAM Temperature_Conversion_7
    implicit none
    
    INTERFACE
        Function Celsius_to_Fahr(Temp)        
        real :: Celsius_to_Fahr
        real, intent(in) :: Temp                
        END Function Celsius_to_Fahr
    END INTERFACE
    
    real :: fahrenheit, celsius
    character(1) :: response
        
    DO
        ! Get a Celsius temperature
        write (*, '(1x, A)', ADVANCE = "NO") "Enter a Celsius temperature:"
        read *, Celsius
    
        ! Use the module function Fahr_to_Celsius to convert it to Celsius       
        Fahrenheit = Celsius_to_Fahr(celsius)
        
        ! Output the result
        print '(1x, 2(F6.2, A))', celsius, & 
            " in Celsius is equivalent to ", fahrenheit, " in Fahrenheit"
        
        ! Check if more temperautre ar to ber converted
        write (*, '(/ 1x, A)', ADVANCE = "NO") &
               "More temperatures to convert (Y or N)?"
        read *, response
                
        IF (response /= "Y") EXIT
    END DO
pause
    END PROGRAM Temperature_Conversion_7

    
Function Celsius_to_Fahr(Temp)
    implicit none
    real :: Celsius_to_Fahr
    real, intent(in) :: Temp        
    Celsius_to_Fahr = (Temp - 32.0) /1.8
End Function Celsius_to_Fahr

 

 

 

 

https://aeir.tistory.com/entry/%ED%8F%AC%ED%8A%B8%EB%9E%80-%EA%B0%95%EC%A2%8C-%EC%84%9C%EB%B8%8C%EB%A3%A8%ED%8B%B4-%EB%B6%80%ED%94%84%EB%A1%9C%EA%B7%B8%EB%9E%A8?category=940076 

 

포트란 강좌 :: 서브루틴 부프로그램

보호되어 있는 글입니다. 내용을 보시려면 비밀번호를 입력하세요.

aeir.tistory.com

 

728x90
반응형

+ Recent posts