@misc{missmecha,title={MissMecha: A Flexible Python Toolkit for Missing Data Mechanisms},author={Zhou, Youran and Bouadjenek, Mohamed Reda and Aryal, Sunil},year={2025},url={https://echoid.github.io/MissMecha/},}
2024
ECML‘24
Missing Data Imputation: Do Advanced ML/DL Techniques Outperform Traditional Approaches?
Youran Zhou, Mohamed Reda Bouadjenek, and Sunil Aryal
In Machine Learning and Knowledge Discovery in Databases. Applied Data Science Track, 2024
Missing data poses a significant challenge in real-world data analysis, prompting the development of various imputation methods. However, existing literature often overlooks two critical limitations. Firstly, many methods assume a Missing Completely At Random (MCAR) mechanism, which is relatively easy to handle but may not reflect real-world scenarios where data is often missing due to some underlying mechanisms (issues/problems) that are often unknown. This type of missing data is categorized as Missing At Random (MAR) and Missing Not At Random (MNAR). Secondly, the effectiveness of these methods is primarily assessed solely in terms of imputation accuracy using metrics such as Root Mean Square Error (RMSE), ignoring the practical utility of imputed data in downstream tasks. In this study, we comprehensively compare a broad spectrum of missing data imputation techniques, ranging from traditional statistical methods to advanced machine and deep learning approaches. Our evaluation considers their effectiveness in handling various missing mechanisms across different missing parameters. Furthermore, we assess the imputed data’s quality not only in terms of RMSE but also its impact on downstream tasks, such as classification, regression, and clustering. Contrary to common assumptions, our findings reveal that the superiority of complex deep learning-based methods is not guaranteed over simple traditional techniques. Moreover, relying solely on RMSE for evaluation can be misleading. Instead, selecting an imputation method should prioritise its effectiveness in enhancing the performance of learning algorithms in downstream tasks.
@inproceedings{10.1007/978-3-031-70381-2_7,author={Zhou, Youran and Bouadjenek, Mohamed Reda and Aryal, Sunil},editor={Bifet, Albert and Krilavi{\v{c}}ius, Tomas and Miliou, Ioanna and Nowaczyk, Slawomir},title={Missing Data Imputation: Do Advanced ML/DL Techniques Outperform Traditional Approaches?},booktitle={Machine Learning and Knowledge Discovery in Databases. Applied Data Science Track},year={2024},publisher={Springer Nature Switzerland},address={Cham},pages={100--115},isbn={978-3-031-70381-2},url={https://link.springer.com/chapter/10.1007/978-3-031-70381-2_7},doi={https://doi.org/10.1007/978-3-031-70381-2_7},}
ECML‘24
Developing robust methods to handle missing data in real-world applications effectively
Youran Zhou, Mohamed Reda Bouadjenek, and Sunil Aryal
2024
This work was presented at the ECML PKDD 2024 PhD Forum. https://ecmlpkdd.org/2024/program-accepted-phd-forum/
@misc{zhou2025developingrobustmethodshandle,title={Developing robust methods to handle missing data in real-world applications effectively},author={Zhou, Youran and Bouadjenek, Mohamed Reda and Aryal, Sunil},year={2024},booktitle={ECML PKDD 2024 PhD Forum},url={https://ecmlpkdd.org/2024/program-accept},note={This work was presented at the ECML PKDD 2024 PhD Forum.
https://ecmlpkdd.org/2024/program-accepted-phd-forum/}}
2022
Msc.Thesis
Synthesizing Tabular Data Using Selectivity Enhanced Generative Adversarial Networks
@misc{zhou2025synthesizingtabulardatausing,title={Synthesizing Tabular Data Using Selectivity Enhanced Generative Adversarial Networks},author={Zhou, Youran and Qi, Jianzhong},year={2022},archiveprefix={arXiv},}