728x90
ASOS_pre0_allsite_with_figs_OK.py
ASOS 전처리 Step 1
1) 기상데이터 포털에서 각 연도별 OBS_ASOS_TIM_XXXX.csv 다운로드
2) 각 사이트별로 변수 그림 그리고, 값이 존재하는 사용 가능한 변수명 확인
In [1]:
import keras
print(keras.__version__)
import tensorflow as tf
print(tf.__version__)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from __future__ import absolute_import, division, print_function, unicode_literals
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import pandas as pd
from download import download
import matplotlib.pyplot as plt
import seaborn as sns
mpl.rcParams['figure.figsize'] = (8,6)
mpl.rcParams['axes.grid'] = False
2.7.0
2.7.0
(1) OBS_ASOS_TIM_comb.csv 파일 읽기
변수명 영문으로 변환
In [24]:
dir_in = "D:/dataset/ASOS"
dir_out = "D:/dataset/my_data"
filename_in = "OBS_ASOS_TIM_comb.csv"
# filename_out = "OBS_ASOS_2015_136.csv"
# print(os.path.isdir(dir)); print(os.path.isfile(os.path.join(dir,file)))
infile = os.path.join(dir_in,filename_in)
# outfile = os.path.join(dir_out, filename_out)
print(infile)
# din = pd.read_csv(infile, encoding='cp949', low_memory=False) # 자료형이 섞여 있어서 low_memory=False 해 줘야 된다.
din = pd.read_csv(infile, encoding='UTF8',low_memory=False) # 'cp949' 에러나면 UTF8으로
D:/dataset/ASOS\OBS_ASOS_TIM_comb.csv
In [18]:
din.head()
Out[18]:
Site_NoSiteDateTair_CTair_flagRain_mmRain_flagWS_m_sWS_flagWD_16deg...Cloud_BH_100mVis_10mSfc_statPhenTsfc_CTsfc_flagT5cm_CT10cm_CT20cm_CT30cm_C01234
90 | 속초 | 2015-01-01 0:00 | NaN | NaN | NaN | NaN | 3.5 | 0.0 | 290.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
90 | 속초 | 2015-01-01 1:00 | -5.0 | 0.0 | NaN | NaN | 3.9 | 0.0 | 270.0 | ... | NaN | NaN | NaN | NaN | -3.4 | 0.0 | NaN | NaN | NaN | NaN |
90 | 속초 | 2015-01-01 2:00 | -5.6 | 0.0 | NaN | NaN | 2.7 | 0.0 | 320.0 | ... | NaN | NaN | NaN | NaN | -3.9 | 0.0 | NaN | NaN | NaN | NaN |
90 | 속초 | 2015-01-01 3:00 | -6.2 | 0.0 | NaN | NaN | 2.1 | 0.0 | 270.0 | ... | 8.0 | 2000.0 | 0.0 | NaN | -4.3 | 0.0 | NaN | NaN | NaN | NaN |
90 | 속초 | 2015-01-01 4:00 | -6.5 | 0.0 | NaN | NaN | 1.7 | 0.0 | 230.0 | ... | 10.0 | 2000.0 | NaN | NaN | -4.7 | 0.0 | NaN | NaN | NaN | NaN |
5 rows × 38 columns
In [25]:
## 컬럼명 영문으로 변경
din.columns = ['Site_No', 'Site', 'Date', 'Tair_C', 'Tair_flag', 'Rain_mm',
'Rain_flag', 'WS_m_s', 'WS_flag', 'WD_16deg', 'WD_flag', 'RH_pct',
'RH_flag', 'Pv_hPa', 'Td_C', 'Pa_hPa', 'Pa_flag', 'Psfc_hPa',
'Psfc_flag', 'Suntime_hr', 'Suntime_flag', 'Sunrad_MJ_m2',
'Sunrad_flag', 'Snow_cm', 'Snow_3hr_cm', 'Cloud_cover_total',
'Cloud_cover_mid_low', 'Cloud_type', 'Cloud_BH_100m', 'Vis_10m',
'Sfc_stat', 'Phen', 'Tsfc_C', 'Tsfc_flag', 'T5cm_C', 'T10cm_C',
'T20cm_C', 'T30cm_C']
print(din.dtypes)
Site_No int64
Site object
Date object
Tair_C float64
Tair_flag float64
Rain_mm float64
Rain_flag float64
WS_m_s float64
WS_flag float64
WD_16deg float64
WD_flag float64
RH_pct float64
RH_flag float64
Pv_hPa float64
Td_C float64
Pa_hPa float64
Pa_flag float64
Psfc_hPa float64
Psfc_flag float64
Suntime_hr float64
Suntime_flag float64
Sunrad_MJ_m2 float64
Sunrad_flag float64
Snow_cm float64
Snow_3hr_cm float64
Cloud_cover_total float64
Cloud_cover_mid_low float64
Cloud_type object
Cloud_BH_100m float64
Vis_10m float64
Sfc_stat float64
Phen float64
Tsfc_C float64
Tsfc_flag float64
T5cm_C float64
T10cm_C float64
T20cm_C float64
T30cm_C float64
dtype: object
In [26]:
print(din['Site_No'].unique())
print(din['Site'].unique())
[ 90 95 98 99 100 101 102 104 105 106 108 112 114 115 116 119 121 127
129 130 131 133 135 136 137 138 140 143 146 152 155 156 159 162 165 168
169 170 172 174 175 176 177 184 185 187 188 189 192 201 202 203 211 212
216 217 221 226 232 235 236 238 243 244 245 247 248 251 252 253 254 255
257 258 259 260 261 262 263 264 266 268 271 272 273 276 277 278 279 281
283 284 285 288 289 294 295 93 239]
['속초' '철원' '동두천' '파주' '대관령' '춘천' '백령도' '북강릉' '강릉' '동해' '서울' '인천' '원주'
'울릉도' '관악산' '수원' '영월' '충주' '서산' '울진' '청주' '대전' '추풍령' '안동' '상주' '포항' '군산'
'대구' '전주' '울산' '창원' '광주' '부산' '통영' '목포' '여수' '흑산도' '완도' '고창' '순천'
'진도(첨찰산)' '대구(기)' '홍성' '제주' '고산' '성산' '서귀포' '진주' '강화' '양평' '이천' '인제' '홍천'
'태백' '정선군' '제천' '보은' '천안' '보령' '부여' '금산' '부안' '임실' '정읍' '남원' '장수' '고창군'
'영광군' '김해시' '순창군' '북창원' '양산시' '보성군' '강진군' '장흥' '해남' '고흥' '의령군' '함양군'
'광양시' '진도군' '봉화' '영주' '문경' '청송군' '영덕' '의성' '구미' '영천' '경주시' '거창' '합천' '밀양'
'산청' '거제' '남해' '북춘천' '세종']
In [27]:
din['Site'].loc[din['Site_No'] == 93]
Out[27]:
828319 북춘천
828320 북춘천
828321 북춘천
828322 북춘천
828323 북춘천
...
4996669 북춘천
4996670 북춘천
4996671 북춘천
4996672 북춘천
4996673 북춘천
Name: Site, Length: 46031, dtype: object
(2) 각 사이트별 관측 변수 그림 그리고, 사용가능한 변수 체크
In [28]:
subsite = din.loc[din['Site_No'] == SITE]
ncol=4; nrow=7
plt.figure(figsize=(20,30))
plt.subplot(nrow,ncol,1);plt.plot(subsite['Tair_C']); plt.title("Tair")
plt.subplot(nrow,ncol,2);plt.plot(subsite['Td_C']); plt.title("Td")
plt.subplot(nrow,ncol,3);plt.plot(subsite['Tsfc_C']); plt.title("Tsfc")
plt.subplot(nrow,ncol,4);plt.plot(subsite['T5cm_C']); plt.title("T5cm")
plt.subplot(nrow,ncol,5);plt.plot(subsite['T10cm_C']); plt.title("T10cm")
plt.subplot(nrow,ncol,6);plt.plot(subsite['T20cm_C']); plt.title("T20cm")
plt.subplot(nrow,ncol,7);plt.plot(subsite['T30cm_C']); plt.title("T30cm")
plt.subplot(nrow,ncol,8);plt.plot(subsite['RH_pct']); plt.title("RH")
plt.subplot(nrow,ncol,9);plt.plot(subsite['WS_m_s']); plt.title("WS")
plt.subplot(nrow,ncol,10);plt.plot(subsite['WD_16deg']); plt.title("WD")
plt.subplot(nrow,ncol,11);plt.plot(subsite['Rain_mm']); plt.title("Rain")
plt.subplot(nrow,ncol,12);plt.plot(subsite['Snow_cm']); plt.title("Snow")
plt.subplot(nrow,ncol,13);plt.plot(subsite['Snow_3hr_cm']); plt.title("Snow_3hr")
plt.subplot(nrow,ncol,14);plt.plot(subsite['Pa_hPa']); plt.title("Pa")
plt.subplot(nrow,ncol,15);plt.plot(subsite['Pv_hPa']); plt.title("Pv")
plt.subplot(nrow,ncol,16);plt.plot(subsite['Psfc_hPa']); plt.title("Psfc")
plt.subplot(nrow,ncol,17);plt.plot(subsite['Sunrad_MJ_m2'], color="red"); plt.title("Sunrad")
plt.subplot(nrow,ncol,18);plt.plot(subsite['Suntime_hr']); plt.title("Suntime")
plt.subplot(nrow,ncol,19);plt.plot(subsite['Cloud_cover_total']); plt.title("Cloud_cover")
plt.subplot(nrow,ncol,20);plt.plot(subsite['Cloud_cover_mid_low']); plt.title("Cloud_ML")
# plt.subplot(nrow,ncol,21);plt.plot(subsite['Cloud_type']); plt.title("Cloud_type")
plt.subplot(nrow,ncol,22);plt.plot(subsite['Cloud_BH_100m']); plt.title("Cloud_BH")
plt.subplot(nrow,ncol,23);plt.plot(subsite['Sfc_stat']); plt.title("Sfc")
plt.subplot(nrow,ncol,24);plt.plot(subsite['Phen']); plt.title("Phenomenon")
plt.subplot(nrow,ncol,25);plt.plot(subsite['Vis_10m']); plt.title("Vis")
print(din['Site'].loc[din['Site_No'] == SITE].unique())
['안동']
In [ ]:
728x90