@article { author = {Kousari, Mohammad Reza and Esmaeilzadeh Hosseini, Mitra Sadat and Miri, Morteza}, title = {Investigation of the efficiency of methods of infilling missing data in relation to the precipitation parameter in arid regions of Iran}, journal = {Journal of the Earth and Space Physics}, volume = {47}, number = {2}, pages = {315-332}, year = {2021}, publisher = {Institute of Geophysics, University of Tehran}, issn = {2538-371X}, eissn = {2538-3906}, doi = {10.22059/jesphys.2021.314958.1007269}, abstract = {Missing data are common issue in climate data. Also precipitation is a very important part of the hydrological cycle and meteorological and hydrological studies of watersheds, initially depend on the quantity and quality of recorded rainfall data and its distribution in the area. Complete and reliable sets of climatic and hydrological data are required for planning and design of these projects. Therefore for treatment of precipitation missing data, various methods have been developed and applied. Normal ratio method, linear regression, multivariate regression and inverse distance weighting (IDW) have a wide applications in natural resources study in our country. Therefore, it is necessary to determine the ability of these methods, especially in relation to the precipitation parameter, which plays a crucial role in the study of natural resources. In this study, the capability of each mentioned methods for infilling missing data of daily, monthly and annual precipitation time series in the arid regions of Iran was investigated for varying proportion of missing data from 5 to 50% of total data. In fact, the main purpose of this study is to answer the question of which of the four mentioned methods are more effective for infilling precipitation missing data. The daily data of Iran’s synoptic meteorological stations were used for the present study. Using the Run homogeneity test, the data homogeneity was investigated. Also, using graphical exploring data, and especially boxplot diagrams, outlier data were identified and flagged as missing data. The average annual precipitation and temperature of 400 stations were determined, and then based on these data their de Martonne coefficients were computed. In the next step, stations with de Martonne coefficient less than 10 were selected as arid climate. Among them, 73 stations that had sufficient data from 1986 to 2017 were distinguished. To evaluate each of the data reconstruction methods, part of the actual data was deliberately discarded from the original data and then reconstructed. Due to high volume of calculations, this process was programmed in MATLAB software. The results showed that each method had different functionality according to the conditions. Daily data are not well estimated using the normal ratio method to estimate the missing data less than the actual one. The use of linear regression method showed that in daily time scale, unlike the normal ratio method, the model accuracy in data reconstruction is higher. For linear regression approach, the distance between the fitted line between the observed and estimated data is small at first, and as the precipitation increases, this distance increases, indicating that the model is less accurate in estimating the extreme values. Given that the fitting line is below the 1:1 line, the linear regression method estimates the actual values below normal. The same results can be found for IDW producer. The multivariate regression method is more accurate for daily time series when the proportion of missing data are not considerable, but is generally very sensitive to the proportion of missing data. The normal ratio method is not suitable for reconstructing daily missing values, however it is more stable than other methods when missing data increase. In monthly time series, the performance of the IDW method and then the normal ratio is better. In annual series, linear correlation, normal ratio, and IDW have better performances, respectively. The findings of this study show that in general, the accuracy of reconstructions on annual scales is more than monthly and on monthly scales is higher than daily. This is due to smoother time series in the monthly and annual time series than the daily ones. Also it should be noted that the scale of current studies is in Iran. If the data from the reserved rain-gauge stations of the Meteorological Organization and the Ministry of Energy are added to this data, the accuracy of the methods is expected to increase. As the results of the present study show, the accuracy of the models decreases with increasing incomplete data ratio. Therefore, if new data is included in missing data processing, there is an expectation of better performance of each of these methods. Finally it should be considered that each method should be used in accordance with the given conditions, and therefore it is recommended to develop a software package for infilling missing data in Iran.}, keywords = {Gap in data,linear regression,Normal Ratio,Infilling,Precipitation}, title_fa = {بررسی کارایی روش‌های بازسازی نواقص آماری در رابطه با پارامتر بارش در مناطق خشک ایران}, abstract_fa = {نواقص آماری عاملی رایج در داده‌های اقلیمی هستند و برای تخمین آنها تا به‌حال روش‌های متنوعی توسعه یافته‌اند. در این میان، روش‌های نسبت‌نرمال، رگرسیون خطی، رگرسیون چندمتغیره و عکس مجذور فاصله یا IDW از کاربرد گسترده‌ای در مطالعات منابع طبیعی کشور ما برخوردار هستند. در پژوهش حاضر، قابلیت هر یک از روش‌های مذکور در بازسازی نواقص آماری بارش روزانه، ماهانه و سالانه مناطق خشک کشور متناسب با میزان نسبت نقص داده از 5 درصد تا 50 درصد داده‌ها موردارزیابی قرارگرفت. نتایج نشان داد که هر روش متناسب با شرایط میزان داده­های گم‌شده از عملکرد متفاوتی برخوردار است. روش رگرسیون چند متغیره هنگامی که نقص داده‌ها زیاد نباشد از دقت بیشتری در بازسازی داده‌های روزانه برخوردار است ولی در کل به میزان نسبت داده‌های گم‌شده حساس می‌باشد. روش نسبت‌نرمال در بازسازی نواقص بارش روزانه مناسب نیست ولی نسبت به میزان نقص داده‌ها از سایر روش‌ها پایدارتر است. در سری‌های زمانی ماهانه عملکرد IDW و سپس نسبت نرمال مناسب است. در سری‌های سالانه به‌ترتیب، روش همبستگی خطی، نسبت‌نرمال و IDW عمکرد بهتری دارند. در کل هر روش متناسب با شرایط بایستی مورد استفاده قرارگیرد و پیشنهاد می‌شود برای بازسازی نواقص آماری، یک بسته نرم‌افزاری برای کل کشور ارائه شود.}, keywords_fa = {Gap in data,linear regression,Normal Ratio,Infilling,Precipitation}, url = {https://jesphys.ut.ac.ir/article_81516.html}, eprint = {https://jesphys.ut.ac.ir/article_81516_f8a5728be9f30a82131f59c27bec132e.pdf} }