@inproceedings{MTMT:34865535, title = {Is Spoken Hungarian Low-resource?: A Quantitative Survey of Hungarian Speech Data Sets}, url = {https://m2.mtmt.hu/api/publication/34865535}, author = {Mihajlik, Péter and Mády, Katalin and Kohári, Anna and Vargha, Fruzsina Sára and Kiss, Gábor and Gráczi, Tekla Etelka and Doğruöz, A. Seza}, booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)}, unique-id = {34865535}, abstract = {Even though various speech data sets are available in Hungarian, there is a lack of a general overview about their types and sizes. To fill in this gap, we provide a survey of available data sets in spoken Hungarian in five categories (e.g., monolingual, Hungarian part of multilingual, pathological, child-related and dialectal collections). In total, the estimated size of available data is about 2800 hours (across 7500 speakers) and it represents a rich spoken language diversity. However, the distribution of the data and its alignment to real-life (e.g. speech recognition) tasks is far from optimal indicating the need for additional larger-scale natural language speech data sets. Our survey presents an overview of available data sets for Hungarian explaining their strengths and weaknesses which is useful for researchers working on Hungarian across disciplines. In addition, our survey serves as a starting point towards a unified foundational speech model specific to Hungarian.}, year = {2024}, pages = {9382-9388}, orcid-numbers = {Mihajlik, Péter/0000-0001-7532-9773; Kohári, Anna/0000-0003-2500-0149; Vargha, Fruzsina Sára/0000-0001-7204-4094; Gráczi, Tekla Etelka/0000-0003-3351-9661} } @inproceedings{MTMT:34855143, title = {Contribution of different movement tasks to differential diagnosis of Parkinson’s disease}, url = {https://m2.mtmt.hu/api/publication/34855143}, isbn = {9789634219446}, author = {Jenei, Attila Zoltán and Sztahó, Dávid}, booktitle = {2nd Workshop on Intelligent Infocommunication Networks, Systems and Services}, doi = {10.3311/WINS2024-011}, unique-id = {34855143}, year = {2024}, pages = {61-66}, orcid-numbers = {Jenei, Attila Zoltán/0000-0003-1007-9907} } @article{MTMT:34854968, title = {A Survey on Integrating Edge Computing With AI and Blockchain in Maritime Domain, Aerial Systems, IoT, and Industry 4.0.}, url = {https://m2.mtmt.hu/api/publication/34854968}, author = {Alnahdi, Amad and Toka, László}, journal-iso = {IEEE ACCESS}, journal = {IEEE ACCESS}, volume = {12}, unique-id = {34854968}, issn = {2169-3536}, abstract = {In terms of digital transformation, organizations today are aware of the critical role that data and information play in their expansion and development in light of the Internet of Things. Many applications are moving from cloud computing to edge computing (EC) to increase network performance and stability. However, applications like intelligent transportation systems, smart grids, smart cities, and healthcare call for even more effective services to satisfy customers. This survey addresses extensive research on two aspects: firstly, we present the advancements of two application domains namely maritime areas and aerial systems in terms of integration with EC architecture. Secondly, we combine the most recent technologies, artificial intelligence (AI) and blockchain, into the EC paradigm by discussing several experiments conducted in various fields to demonstrate the value of utilizing them in edge computing architecture. We analyze the results of eleven experiments in each technology from 2015 to 2023.}, keywords = {Edge computing, Maritime domain, Aerial systems, IoT, Industry 4.0, Artificial intelligence, Blockchain.}, year = {2024}, eissn = {2169-3536}, pages = {1-26}, orcid-numbers = {Alnahdi, Amad/0009-0002-6026-7623; Toka, László/0000-0003-1045-9205} } @inproceedings{MTMT:34854938, title = {Multi-speaker child speech synthesis in low-resource Hungarian language}, url = {https://m2.mtmt.hu/api/publication/34854938}, author = {Alwaisi, Shaimaa and Al-Radhi, Mohammed Salah and Németh, Géza}, booktitle = {2nd Workshop on Intelligent Infocommunication Networks, Systems and Services}, doi = {10.3311/WINS2024-004}, unique-id = {34854938}, year = {2024}, pages = {19-24} } @article{MTMT:34846759, title = {Industry 5.0: Research Areas and Challenges With Artificial Intelligence and Human Acceptance}, url = {https://m2.mtmt.hu/api/publication/34846759}, author = {Dimitrakopoulos, G. and Varga, Pál and Gutt, T. and Schneider, G. and Ehm, H. and Hoess, A. and Tauber, M. and Karathanasopoulou, K. and Lackner, A. and Delsing, J.}, doi = {10.1109/MIE.2024.3387068}, journal-iso = {IEEE IND ELECTRON M}, journal = {IEEE INDUSTRIAL ELECTRONICS MAGAZINE}, unique-id = {34846759}, issn = {1932-4529}, abstract = {The industrial landscape is swiftly progressing toward Industry 5.0, marking the fifth revolution characterized by the integration of sustainable practices and digital sovereignty. This article advocates for the adoption, expansion, and implementation of artificial intelligence (AI)-enabled hardware, tools, methods, and semiconductor technologies in the journey toward Industry 5.0. Beyond the initial proposal, the article explores primary research areas and the diverse challenges inherent in this transition. Notably, significant accomplishments in pivotal industrial use cases are appended, providing validation evidence. This comprehensive approach aims to bridge academic advancements with practical industrial application, fostering a symbiotic relationship between humans and machines for increased efficiency, innovation, and adaptability. IEEE}, keywords = {Artificial intelligence; Artificial intelligence; PRODUCTIVITY; Production; Semiconductor device manufacture; Sustainable development; Sustainable development; Research areas; Industrial research; Industries; Industrial revolutions; Research challenges; Fourth Industrial Revolution; Fourth Industrial Revolution; Sustainable practices; Hardware technology; Fifth industrial revolution; Fifth industrial revolution; Hardware tools; Tool technology}, year = {2024}, eissn = {1941-0115}, pages = {2-13} } @article{MTMT:34841731, title = {Enhancing Parkinson's Disease Recognition through Multimodal Analysis of Archimedean Spiral Drawings}, url = {https://m2.mtmt.hu/api/publication/34841731}, author = {Jenei, Attila Zoltán and Sztahó, Dávid and Valálik, István}, doi = {10.36244/ICJ.2024.1.8}, journal-iso = {INFOCOMM J}, journal = {INFOCOMMUNICATIONS JOURNAL}, volume = {16}, unique-id = {34841731}, issn = {2061-2079}, abstract = {Parkinson's disease is one of the most common neurodegenerative diseases, which is incurable according to recent clinical knowledge. Evaluating motor symptoms across diverse modalities such as speech, handwriting, and movement composes a conventional diagnostic approach. However, concurrently utilizing multimodal datasets encompassing drawing and acceleration data remains an underexplored f ield. Our investigation involved examining drawing and movement data of 45 Parkinson's disease (PD) patients and 47 healthy individuals (HC). The PD group presented mild symptoms in the right hand. We transformed drawing data into spiral images and used visual representations of motion data, employing pre-trained models for feature extraction and classifiers. While motion representations exhibited superior performance compared to drawing images, a comprehensive evaluation with the Mann-Whitney U test at a significance level of 0.05 revealed no statistically significant difference between the efficacy of movement and drawing data in all classification scenarios. Significant improvements were made by combining the drawing data predictions with the motion data predictions. The key finding of the research is that the recognition of the disease can be improved by connecting (post-model) the two modalities. Furthermore, it can be concluded that with the present approach, neither the drawing nor the movement data produced lower results on average.}, year = {2024}, eissn = {2061-2125}, pages = {64-71}, orcid-numbers = {Jenei, Attila Zoltán/0000-0003-1007-9907} } @article{MTMT:34841720, title = {Advancements in Expressive Speech Synthesis: a Review}, url = {https://m2.mtmt.hu/api/publication/34841720}, author = {Alwaisi, Shaimaa and Németh, Géza}, doi = {10.36244/ICJ.2024.1.5}, journal-iso = {INFOCOMM J}, journal = {INFOCOMMUNICATIONS JOURNAL}, volume = {16}, unique-id = {34841720}, issn = {2061-2079}, abstract = {In recent years, we have witnessed a fast and wide spread acceptance of speech synthesis technology in, leading to the transition toward a society characterized by a strong desire to incorporate these applications in their daily lives. We provide a comprehensive survey on the recent advancements in the field of expressive Text-To- Speech systems. Among different methods to represent expressivity, this paper focuses the development of ex pressive TTS systems, emphasizing the methodologies employed to enhance the quality and expressiveness of synthetic speech, such as style transfer and improving speaker variability. After that, we point out some of the subjective and objective metrics that are used to evaluate the quality of synthesized speech. Fi nally, we point out the realm of child speech synthesis, a domain that has been neglected for some time. This underscores that the f ield of research in children's speech synthesis is still wide open for exploration and development. Overall, this paper presents a comprehensive overview of historical and contemporary trends and future directions in speech synthesis research.}, year = {2024}, eissn = {2061-2125}, pages = {35-46} } @article{MTMT:34841474, title = {Speech synthesis from intracranial stereotactic Electroencephalography using a neural vocoder}, url = {https://m2.mtmt.hu/api/publication/34841474}, author = {Arthur, Frigyes Viktor and Csapó, Tamás Gábor}, doi = {10.36244/ICJ.2024.1.6}, journal-iso = {INFOCOMM J}, journal = {INFOCOMMUNICATIONS JOURNAL}, volume = {16}, unique-id = {34841474}, issn = {2061-2079}, abstract = {Speech is one of the most important human biosig nals. However, only some speech production characteristics are fully understood, which are required for a successful speech based Brain-Computer Interface (BCI). A proper brain-to speech system that can generate the speech of full sentences intelligibly and naturally poses a great challenge. In our study, we used the SingleWordProduction-Dutch-iBIDS dataset, in which speech and intracranial stereotactic electroencephalography (sEEG) signals of the brain were recorded simultaneously during a single word production task. We apply deep neural networks (FC-DNN, 2D-CNN, and 3D-CNN) on the ten speakers’ data for sEEG-to-Mel spectrogram prediction. Next, we synthesize speech using the WaveGlow neural vocoder. Our objective and subjective evaluations have shown that the DNN based approaches with neural vocoder outperform the baseline linear regression model using Griffin-Lim. The synthesized samples resemble the original speech but are still not intelligible, and the results are clearly speaker dependent. In the long term, speech-based BCI applications might be useful for the speaking impaired or those having neurological disorders.}, year = {2024}, eissn = {2061-2125}, pages = {47-55}, orcid-numbers = {Csapó, Tamás Gábor/0000-0003-4375-7524} } @inproceedings{MTMT:34832880, title = {Empowering ISPs with Cloud Gaming User Experience Modeling: A NVIDIA GeForce NOW Use-Case}, url = {https://m2.mtmt.hu/api/publication/34832880}, author = {Dobreff, Gergely and Frey, Dániel and Báder, Attila and Pašić, Alija}, booktitle = {2024 27th Conference on Innovation in Clouds, Internet and Networks (ICIN)}, doi = {10.1109/ICIN60470.2024.10494462}, unique-id = {34832880}, abstract = {Cloud gaming has emerged as a cost-effective and accessible gaming solution, with platforms like NVIDIA GeForce NOW leading the way. The rapid growth of this industry, projected to reach 6.8 billion USD by 2028, has sparked the need for enhanced user experience models to optimize cloud and network infrastructure. In our study, we conducted a comprehensive analysis of the in-game performance of the popular NVIDIA GeForce NOW cloud gaming platform under varying network conditions. Our research focused on quality of service (QoS) metrics, particularly the WebRTC logs, and their relationship with user experience, defined as in-game performance. Standardized and repeatable measurements from the GeForce NOW platform were used, where the player was asked to complete training exercises of fast-paced games under different network conditions. This paper analyses and proposes machine learning (ML) models that estimate the user experience of cloud gaming. The models are trained on the low-level network- and application-related QoS metrics extracted from WebRTC logs. Our contribution demonstrates that ML models can accurately estimate in-game performance from QoS parameters, highlighting network latency's greater impact on the player's gaming experience than packet loss, bandwidth, and jitter. With our novel model, internet service providers (ISPs) can effectively estimate user experience using only network-related metrics, enabling network optimization and enhancing gaming services. This research deepens our understanding of cloud gaming user experience and offers insights for refining cloud gaming services. © 2024 IEEE.}, keywords = {PERFORMANCE; Data Collection; Data Collection; Cost effectiveness; Quality of service; Quality of service; Cost effective; User experience; Machine learning models; Quality-of-service; Internet service providers; Quality of service metrics; Network condition; Users' experiences; Cloud gamings; Cloud gaming; user experience modeling; User experience model}, year = {2024}, pages = {202-209}, orcid-numbers = {Pašić, Alija/0000-0001-6346-496X} } @article{MTMT:34804256, title = {Single and Combined Algorithms for Open Set Classification on Image Datasets}, url = {https://m2.mtmt.hu/api/publication/34804256}, author = {AL-SHOUHA, MODAFAR MOHAMMAD MAHMOOD and Szűcs, Gábor}, doi = {10.14232/actacyb.298356}, journal-iso = {ACTA CYBERN-SZEGED}, journal = {ACTA CYBERNETICA}, volume = {Special Issue of the 13th Conference of PhD Students in Computer Science}, unique-id = {34804256}, issn = {0324-721X}, abstract = {Generally, classification models have closed nature, and they are constrained by the number of classes in the training data. Hence, classifying "unknown" - OOD (out-of-distribution) - samples is challenging, especially in the so called "open set" problem. We propose and investigate different solutions - single and combined algorithms - to tackle this task, where we use and expand a K-classifier to be able to identify K+1 classes. They do not require any retraining or modification on the K-classifier architecture. We show their strengths when avoiding type I or type II errors is fundamental. We also present a mathematical representation for the task to estimate the K+1 classification accuracy, and an inequality that defines its boundaries. Additionally, we introduce a formula to calculate the exact K+1 classification accuracy.}, year = {2024}, eissn = {2676-993X}, pages = {1-26}, orcid-numbers = {AL-SHOUHA, MODAFAR MOHAMMAD MAHMOOD/0000-0003-2051-4036; Szűcs, Gábor/0000-0002-5781-1088} }