@inproceedings{MTMT:34067268, title = {Adaptation of Tongue Ultrasound-Based Silent Speech Interfaces Using Spatial Transformer Networks}, url = {https://m2.mtmt.hu/api/publication/34067268}, author = {Tóth, László and Honarmandi Shandiz, Amin and Gosztolya, Gábor and Csapó, Tamás Gábor}, booktitle = {Proceedings of the 24th International Speech Communication Association, Interspeech 2023}, doi = {10.21437/Interspeech.2023-1607}, unique-id = {34067268}, abstract = {Thanks to the latest deep learning algorithms, silent speech interfaces (SSI) are now able to synthesize intelligible speech from articulatory movement data under certain conditions. However, the resulting models are rather speaker-specific, making a quick switch between users troublesome. Even for the same speaker, these models perform poorly cross-session, i.e. after dismounting and re-mounting the recording equipment. To aid quick speaker and session adaptation of ultrasound tongue imaging-based SSI models, we extend our deep networks with a spatial transformer network (STN) module, capable of performing an affine transformation on the input images. Although the STN part takes up only about 10% of the network, our experiments show that adapting just the STN module might allow to reduce MSE by 88% on the average, compared to retraining the whole network. The improvement is even larger (around 92%) when adapting the network to different recording sessions from the same speaker. © 2023 International Speech Communication Association. All rights reserved.}, year = {2023}, pages = {1169-1173}, orcid-numbers = {Tóth, László/0000-0003-0161-1375; Gosztolya, Gábor/0000-0002-2864-6466; Csapó, Tamás Gábor/0000-0003-4375-7524} } @{MTMT:33576741, title = {Temporal variables of speech in Parkinson’s Disease in three spontaneous speaking tasks}, url = {https://m2.mtmt.hu/api/publication/33576741}, author = {Bóna, Judit and Gosztolya, Gábor and Hoffmann, Ildikó and Klivényi, Péter and Tóth, Alinka and Svindt, Veronika and Tóth, László and Lőrincz, András}, booktitle = {Book of Abstracts : The 11th scientific conference with international participation Speech Research, Faculty of Humanities and Social Sciences, Zagreb, Croatia, December 8 - 10 2022}, unique-id = {33576741}, year = {2022}, pages = {28-29}, orcid-numbers = {Bóna, Judit/0000-0003-2369-1636; Gosztolya, Gábor/0000-0002-2864-6466; Klivényi, Péter/0000-0002-5389-3266; Svindt, Veronika/0000-0002-6027-9029; Tóth, László/0000-0003-0161-1375; Lőrincz, András/0000-0002-1280-3447} } @inproceedings{MTMT:33563542, title = {Depthwise Convolutions using Physicochemical Features of DNA for Transcription Factor Binding Site Classification. Physicochemical Features for DNA-Protein Classification with Depthwise Convolutions}, url = {https://m2.mtmt.hu/api/publication/33563542}, author = {Pap, Gergely and Ádám, Krisztián and Györgypál, Zoltán and Tóth, László and Hegedűs, Zoltán}, booktitle = {ICAAI '22: Proceedings of the 6th International Conference on Advances in Artificial Intelligence}, doi = {10.1145/3571560.3571563}, unique-id = {33563542}, year = {2022}, pages = {15-21}, orcid-numbers = {Tóth, László/0000-0003-0161-1375} } @article{MTMT:33220025, title = {Optimizing the Ultrasound Tongue Image Representation for Residual Network-Based Articulatory-to-Acoustic Mapping}, url = {https://m2.mtmt.hu/api/publication/33220025}, author = {Csapó, Tamás Gábor and Gosztolya, Gábor and Tóth, László and Honarmandi Shandiz, Amin and Markó, Alexandra}, doi = {10.3390/s22228601}, journal-iso = {SENSORS-BASEL}, journal = {SENSORS}, volume = {22}, unique-id = {33220025}, abstract = {Within speech processing, articulatory-to-acoustic mapping (AAM) methods can apply ultrasound tongue imaging (UTI) as an input. (Micro)convex transducers are mostly used, which provide a wedge-shape visual image. However, this process is optimized for the visual inspection of the human eye, and the signal is often post-processed by the equipment. With newer ultrasound equipment, now it is possible to gain access to the raw scanline data (i.e., ultrasound echo return) without any internal post-processing. In this study, we compared the raw scanline representation with the wedge-shaped processed UTI as the input for the residual network applied for AAM, and we also investigated the optimal size of the input image. We found no significant differences between the performance attained using the raw data and the wedge-shaped image extrapolated from it. We found the optimal pixel size to be 64 × 43 in the case of the raw scanline input, and 64 × 64 when transformed to a wedge. Therefore, it is not necessary to use the full original 64 × 842 pixels raw scanline, but a smaller image is enough. This allows for the building of smaller networks, and will be beneficial for the development of session and speaker-independent methods for practical applications. AAM systems have the target application of a “silent speech interface”, which could be helpful for the communication of the speaking-impaired, in military applications, or in extremely noisy conditions.}, year = {2022}, eissn = {1424-8220}, orcid-numbers = {Csapó, Tamás Gábor/0000-0003-4375-7524; Gosztolya, Gábor/0000-0002-2864-6466; Tóth, László/0000-0003-0161-1375; Markó, Alexandra/0000-0003-0301-7134} } @inproceedings{MTMT:33096595, title = {Improved Processing of Ultrasound Tongue Videos by Combining ConvLSTM and 3D Convolutional Networks}, url = {https://m2.mtmt.hu/api/publication/33096595}, author = {Honarmandi Shandiz, Amin and Tóth, László}, booktitle = {Advances and Trends in Artificial Intelligence. Theory and Practices in Artificial Intelligence}, doi = {10.1007/978-3-031-08530-7_22}, unique-id = {33096595}, year = {2022}, pages = {265-274}, orcid-numbers = {Tóth, László/0000-0003-0161-1375} } @article{MTMT:32841964, title = {Temporal speech parameters detect mild cognitive impairment in different languages: validation and comparison of the Speech-GAP Test® in English and Hungarian}, url = {https://m2.mtmt.hu/api/publication/32841964}, author = {Kálmán, János and Devanand, Davangere P. and Gosztolya, Gábor and Balogh, Réka and Imre, Nóra and Tóth, László and Hoffmann, Ildikó and Kovács, Ildikó and Vincze, Veronika and Pákáski, Magdolna}, doi = {10.2174/1567205019666220418155130}, journal-iso = {CURR ALZHEIMER RES}, journal = {CURRENT ALZHEIMER RESEARCH}, volume = {19}, unique-id = {32841964}, issn = {1567-2050}, year = {2022}, eissn = {1875-5828}, pages = {373-386}, orcid-numbers = {Kálmán, János/0000-0001-5319-5639; Gosztolya, Gábor/0000-0002-2864-6466; Tóth, László/0000-0003-0161-1375; Kovács, Ildikó/0000-0003-4215-8351; Vincze, Veronika/0000-0002-9844-2194; Pákáski, Magdolna/0000-0001-8067-5435} } @inproceedings{MTMT:32800392, title = {Using Acoustic Deep Neural Network Embeddings to Detect Multiple Sclerosis From Speech}, url = {https://m2.mtmt.hu/api/publication/32800392}, author = {Gosztolya, Gábor and Tóth, László and Svindt, Veronika and Bóna, Judit and Hoffmann, Ildikó}, booktitle = {ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, doi = {10.1109/ICASSP43922.2022.9746856}, unique-id = {32800392}, year = {2022}, pages = {6927-6931}, orcid-numbers = {Gosztolya, Gábor/0000-0002-2864-6466; Tóth, László/0000-0003-0161-1375; Svindt, Veronika/0000-0002-6027-9029; Bóna, Judit/0000-0003-2369-1636} } @inproceedings{MTMT:32800358, title = {Using Spectral Sequence-to-Sequence Autoencoders to Assess Mild Cognitive Impairment}, url = {https://m2.mtmt.hu/api/publication/32800358}, author = {Kiss-Vetráb, Mercedes and José Vicente, Egas López and Balogh, Réka and Imre, Nóra and Hoffmann, Ildikó and Tóth, László and Pákáski, Magdolna and Kálmán, János and Gosztolya, Gábor}, booktitle = {ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, doi = {10.1109/ICASSP43922.2022.9746148}, unique-id = {32800358}, year = {2022}, pages = {6467-6471}, orcid-numbers = {Kiss-Vetráb, Mercedes/0000-0002-3914-2036; Tóth, László/0000-0003-0161-1375; Pákáski, Magdolna/0000-0001-8067-5435; Kálmán, János/0000-0001-5319-5639; Gosztolya, Gábor/0000-0002-2864-6466} } @article{MTMT:32761562, title = {Automatic screening of mild cognitive impairment and Alzheimer’s disease by means of posterior-thresholding hesitation representation}, url = {https://m2.mtmt.hu/api/publication/32761562}, author = {José Vicente, Egas López and Balogh, Réka and Imre, Nóra and Hoffmann, Ildikó and Szabó, Martina Katalin and Tóth, László and Pákáski, Magdolna and Kálmán, János and Gosztolya, Gábor}, doi = {10.1016/j.csl.2022.101377}, journal-iso = {COMPUT SPEECH LANG}, journal = {COMPUTER SPEECH AND LANGUAGE}, volume = {75}, unique-id = {32761562}, issn = {0885-2308}, year = {2022}, eissn = {1095-8363}, orcid-numbers = {Szabó, Martina Katalin/0000-0002-4192-4352; Tóth, László/0000-0003-0161-1375; Pákáski, Magdolna/0000-0001-8067-5435; Kálmán, János/0000-0001-5319-5639; Gosztolya, Gábor/0000-0002-2864-6466} } @article{MTMT:32749289, title = {Temporal Speech Parameters Indicate Early Cognitive Decline in Elderly Patients With Type 2 Diabetes Mellitus}, url = {https://m2.mtmt.hu/api/publication/32749289}, author = {Imre, Nóra and Balogh, Réka and Gosztolya, Gábor and Tóth, László and Hoffmann, Ildikó and Várkonyi, Tamás and Lengyel, Csaba Attila and Pákáski, Magdolna and Kálmán, János}, doi = {10.1097/WAD.0000000000000492}, journal-iso = {ALZ DIS ASSOC DIS}, journal = {ALZHEIMER DISEASE & ASSOCIATED DISORDERS}, volume = {36}, unique-id = {32749289}, issn = {0893-0341}, abstract = {The earliest signs of cognitive decline include deficits in temporal (time-based) speech characteristics. Type 2 diabetes mellitus (T2DM) patients are more prone to mild cognitive impairment (MCI). The aim of this study was to compare the temporal speech characteristics of elderly (above 50 y) T2DM patients with age-matched nondiabetic subjects.A total of 160 individuals were screened, 100 of whom were eligible (T2DM: n=51; nondiabetic: n=49). Participants were classified either as having healthy cognition (HC) or showing signs of MCI. Speech recordings were collected through a phone call. Based on automatic speech recognition, 15 temporal parameters were calculated.The HC with T2DM group showed significantly shorter utterance length, higher duration rate of silent pause and total pause, and higher average duration of silent pause and total pause compared with the HC without T2DM group. Regarding the MCI participants, parameters were similar between the T2DM and the nondiabetic subgroups.Temporal speech characteristics of T2DM patients showed early signs of altered cognitive functioning, whereas neuropsychological tests did not detect deterioration. This method is useful for identifying the T2DM patients most at risk for manifest MCI, and could serve as a remote cognitive screening tool.}, year = {2022}, eissn = {1546-4156}, pages = {148-155}, orcid-numbers = {Gosztolya, Gábor/0000-0002-2864-6466; Tóth, László/0000-0003-0161-1375; Várkonyi, Tamás/0000-0001-6833-3563; Lengyel, Csaba Attila/0000-0002-0434-0067; Pákáski, Magdolna/0000-0001-8067-5435; Kálmán, János/0000-0001-5319-5639} }