@article{MTMT:34694205, title = {A multimodal deep learning architecture for smoking detection with a small data approach}, url = {https://m2.mtmt.hu/api/publication/34694205}, author = {Lakatos, Róbert and Pollner, Péter and Hajdu, András and Joó, Tamás}, doi = {10.3389/frai.2024.1326050}, journal-iso = {FRONTI ARTIF INTELL}, journal = {FRONTIERS IN ARTIFICIAL INTELLIGENCE}, volume = {7}, unique-id = {34694205}, abstract = {Covert tobacco advertisements often raise regulatory measures. This paper presents that artificial intelligence, particularly deep learning, has great potential for detecting hidden advertising and allows unbiased, reproducible, and fair quantification of tobacco-related media content. We propose an integrated text and image processing model based on deep learning, generative methods, and human reinforcement, which can detect smoking cases in both textual and visual formats, even with little available training data. Our model can achieve 74% accuracy for images and 98% for text. Furthermore, our system integrates the possibility of expert intervention in the form of human reinforcement. Using the pre-trained multimodal, image, and text processing models available through deep learning makes it possible to detect smoking in different media even with few training data.}, year = {2024}, eissn = {2624-8212}, orcid-numbers = {Pollner, Péter/0000-0003-0464-4893; Joó, Tamás/0000-0002-3551-6125} } @article{MTMT:34676546, title = {A Machine Learning-Based Pipeline for the Extraction of Insights from Customer Reviews}, url = {https://m2.mtmt.hu/api/publication/34676546}, author = {Lakatos, Róbert and Bogacsovics, Gergő and Harangi, Balázs and Lakatos, István and Tiba, Attila and Tóth, János and Szabó, Marianna and Hajdu, András}, doi = {10.3390/bdcc8030020}, journal-iso = {BIG DATA COGN COMPUT}, journal = {BIG DATA AND COGNITIVE COMPUTING}, volume = {8}, unique-id = {34676546}, abstract = {The efficiency of natural language processing has improved dramatically with the advent of machine learning models, particularly neural network-based solutions. However, some tasks are still challenging, especially when considering specific domains. This paper presents a model that can extract insights from customer reviews using machine learning methods integrated into a pipeline. For topic modeling, our composite model uses transformer-based neural networks designed for natural language processing, vector-embedding-based keyword extraction, and clustering. The elements of our model have been integrated and tailored to better meet the requirements of efficient information extraction and topic modeling of the extracted information for opinion mining. Our approach was validated and compared with other state-of-the-art methods using publicly available benchmark datasets. The results show that our system performs better than existing topic modeling and keyword extraction methods in this task.}, year = {2024}, eissn = {2504-2289}, pages = {1-24}, orcid-numbers = {Harangi, Balázs/0000-0003-4405-2040} } @inproceedings{MTMT:34560319, title = {Real-Time Birds Shadow Detection for Autonomous UAVs}, url = {https://m2.mtmt.hu/api/publication/34560319}, author = {Bouali, Kassem Anis and Hajdu, András}, booktitle = {Artificial Intelligence: Towards Sustainable Intelligence}, doi = {10.1007/978-3-031-47997-7_13}, unique-id = {34560319}, year = {2023}, pages = {169-177} } @article{MTMT:34529262, title = {Steering Angle Prediction From a Camera Image as a Backup Service}, url = {https://m2.mtmt.hu/api/publication/34529262}, author = {Pándy, Árpád and Kovács, László and Hajdu, András}, doi = {10.1109/LSENS.2023.3326105}, journal-iso = {IEEE SENSORS LETTERS}, journal = {IEEE SENSORS LETTERS}, volume = {7}, unique-id = {34529262}, issn = {2475-1472}, year = {2023}, pages = {1-4}, orcid-numbers = {Pándy, Árpád/0000-0001-8787-7486} } @inproceedings{MTMT:34523011, title = {Extracting Drug Names from Medical Reports}, url = {https://m2.mtmt.hu/api/publication/34523011}, author = {Pándy, Árpád and Harangi, Balázs and Hajdu, András}, booktitle = {2023 IEEE 18th International Conference on Computer Science and Information Technologies (CSIT)}, doi = {10.1109/CSIT61576.2023.10324071}, unique-id = {34523011}, year = {2023}, pages = {1-4}, orcid-numbers = {Pándy, Árpád/0000-0001-8787-7486; Harangi, Balázs/0000-0003-4405-2040} } @article{MTMT:34417887, title = {Climate change in the Debrecen area in the last 50 years and its impact on maize production}, url = {https://m2.mtmt.hu/api/publication/34417887}, author = {Gombos, Béla and Nagy, Zoltán and Hajdu, András and Nagy, János}, doi = {10.28974/idojaras.2023.4.5}, journal-iso = {IDŐJÁRÁS}, journal = {IDŐJÁRÁS / QUARTERLY JOURNAL OF THE HUNGARIAN METEOROLOGICAL SERVICE}, volume = {127}, unique-id = {34417887}, issn = {0324-6329}, abstract = {The average yield of maize is significantly dependent on the meteorological conditions of the growing year. Both the most favorable weather conditions and the weather anomalies that tend to cause damage depend on the given phenophase. The aim of this research is to analyze the climatic changes that are important in maize production in the Hajdúság region. For the climatological study of the area, homogenized temperature and precipitation data from the Hungarian Meteorological Service was used for the Debrecen region, which are freely available for download from the data repository of the institution. Trend analysis was performed for the last 50-year (1973–2022) and 30-year (1993–2022) periods. In total, 40 meteorological data series matching the study objective were analyzed. Linear regression calculations were performed using the SPSS 27 statistical software. For the non-parametric procedure, the MAKESENS Excel application was used, based on the Mann-Kendall (MK) test and Sen's slope estimation. This research shows that the choice of the length of the study period affects the results of trend analysis. The numerical values of the trend slope for the 30-year vs. 50-year period differ, and for some parameters there are also substantial differences (e.g., trend sign). The results of the parametric and non-parametric trend analyses differed only marginally for the temperature variables included. Also, for precipitation data that do not follow a normal distribution (e.g., monthly), there were only a few significant differences. The trend in mean annual temperature shows an increase of 0.39 and 0.52 °C in 10 years, and an increase of around 2 °C in 50 years and 1.5 °C in 30 years. There is a significant warming in both the summer and winter half-years, with the summer half-year showing a steeper upward trend in the 50-year data series and the winter half-year in the 30-year data series. There is a clear pattern of large, highly significant warming in the summer months and less significant changes in the two spring and two autumn months that were observed. A negative, non-significant trend in annual precipitation is observed. The decreases of 17 mm and 24 mm/10 years obtained for the 50- and 30-year time series are not negligible from a practical point of view. For the summer half-year, the precipitation amount is decreasing, with a slope of -27 mm/10 years for the last 30 years, but even this value is not significant due to the high variability. There is no significant change in the amount of precipitation in the winter half-year over the last decades. Significant trends cannot be detected from monthly or even semi-annual or annual precipitation data. The Mann-Kendall test showed a trend decrease only in the 30-year April data series at the p=0.1 significance level. Overall, the changes are negative for maize production. It should be highlighted that the obvious warming, combined with a slight decrease in precipitation, is leading to a deterioration in crop water availability and a reduction in crop yields. The impact of the identified adverse climatic changes can be compensated to a significant extent by the proposed agrotechnical responses.}, year = {2023}, eissn = {0324-6329}, pages = {485-504} } @article{MTMT:34395482, title = {Composing Diverse Ensembles of Convolutional Neural Networks by Penalization}, url = {https://m2.mtmt.hu/api/publication/34395482}, author = {Harangi, Balázs and Baran, Ágnes and Beregi-Kovács, Marcell and Hajdu, András}, doi = {10.3390/math11234730}, journal-iso = {MATHEMATICS-BASEL}, journal = {MATHEMATICS}, volume = {11}, unique-id = {34395482}, abstract = {Ensemble-based systems are well known to have the capacity to outperform individual approaches if the ensemble members are sufficiently accurate and diverse. This paper investigates how an efficient ensemble of deep convolutional neural networks (CNNs) can be created by forcing them to adjust their parameters during the training process to increase diversity in their decisions. As a new theoretical approach to reach this aim, we join the member neural architectures via a fully connected layer and insert a new correlation penalty term in the loss function to obstruct their similar operation. With this complementary term, we implement the standard guideline of ensemble creation to increase the members’ diversity for CNNs in a more detailed and flexible way than similar existing techniques. As for applicability, we show that our approach can be efficiently used in various classification tasks. More specifically, we demonstrate its performance in challenging medical image analysis and natural image classification problems. Besides the theoretical considerations and foundations, our experimental findings suggest that the proposed technique is competitive. Namely, on the one hand, the classification rate of the ensemble trained in this way outperformed all the individual accuracies of the state-of-the-art member CNNs according to the standard error functions of these application domains. On the other hand, it is also validated that the ensemble members get more diverse and their accuracies are raised by adding the penalization term. Moreover, we performed a full comparative analysis, including other state-of-the-art ensemble-based approaches recommended for the same classification tasks. This comparative study also confirmed the superiority of our method, as it overcame the current solutions.}, year = {2023}, eissn = {2227-7390}, pages = {4730}, orcid-numbers = {Harangi, Balázs/0000-0003-4405-2040} } @inproceedings{MTMT:34216921, title = {Increasing the diversity of ensemble members for accurate brain tumor classification}, url = {https://m2.mtmt.hu/api/publication/34216921}, author = {Bogacsovics, Gergő and Harangi, Balázs and Hajdu, András}, booktitle = {36th IEEE International Symposium on Computer-Based Medical Systems, CBMS 2023}, doi = {10.1109/CBMS58004.2023.00274}, unique-id = {34216921}, year = {2023}, pages = {529-534}, orcid-numbers = {Harangi, Balázs/0000-0003-4405-2040} } @article{MTMT:34182540, title = {Using Noisy Evaluation to Accelerate Parameter Optimization of Medical Image Segmentation Ensembles}, url = {https://m2.mtmt.hu/api/publication/34182540}, author = {Tóth, János and Tomán, Henrietta and Hajdu, Gabriella and Hajdu, András}, doi = {10.3390/math11183992}, journal-iso = {MATHEMATICS-BASEL}, journal = {MATHEMATICS}, volume = {11}, unique-id = {34182540}, abstract = {An important concern with regard to the ensembles of algorithms is that using the individually optimal parameter settings of the members does not necessarily maximize the performance of the ensemble itself. In this paper, we propose a novel evaluation method for simulated annealing that combines dataset sampling and image downscaling to accelerate the parameter optimization of medical image segmentation ensembles. The scaling levels and sample sizes required to maintain the convergence of the search are theoretically determined by adapting previous results for simulated annealing with imprecise energy measurements. To demonstrate the efficiency of the proposed method, we optimize the parameters of an ensemble for lung segmentation in CT scans. Our experimental results show that the proposed method can maintain the solution quality of the base method with significantly lower runtime. In our problem, optimization with simulated annealing yielded an F1 score of 0.9397 and an associated MCC of 0.7757. Our proposed method maintained the solution quality with an F1 score of 0.9395 and MCC of 0.7755 while exhibiting a 42.01% reduction in runtime. It was also shown that the proposed method is more efficient than simulated annealing with only sampling-based evaluation when the dataset size is below a problem-specific threshold.}, year = {2023}, eissn = {2227-7390} } @article{MTMT:33731626, title = {Machine learning approach combined with causal relationship inferring unlocks the shared pathomechanism between COVID-19 and acute myocardial infarction}, url = {https://m2.mtmt.hu/api/publication/33731626}, author = {Liu, Ying and Zhou, Shujing and Wang, Longbin and Xu, Ming and Huang, Xufeng and Li, Zhengrui and Hajdu, András and Zhang, Ling}, doi = {10.3389/fmicb.2023.1153106}, journal-iso = {FRONT MICROBIOL}, journal = {FRONTIERS IN MICROBIOLOGY}, volume = {14}, unique-id = {33731626}, issn = {1664-302X}, year = {2023}, eissn = {1664-302X} }