@misc{MTMT:32087221, title = {The use of subwords for Automatic Speech Recognition}, url = {https://m2.mtmt.hu/api/publication/32087221}, author = {Mollberg, David Erik}, unique-id = {32087221}, year = {2021} } @article{MTMT:31992793, title = {Advances in subword-based HMM-DNN speech recognition across languages}, url = {https://m2.mtmt.hu/api/publication/31992793}, author = {Smit, Peter and Virpioja, Sami and Kurimo, Mikko}, doi = {10.1016/j.csl.2020.101158}, journal-iso = {COMPUT SPEECH LANG}, journal = {COMPUTER SPEECH AND LANGUAGE}, volume = {66}, unique-id = {31992793}, issn = {0885-2308}, year = {2021}, eissn = {1095-8363} } @article{MTMT:31992800, title = {Morphologically motivated word classes for very large vocabulary speech recognition of Finnish and Estonian}, url = {https://m2.mtmt.hu/api/publication/31992800}, author = {Varjokallio, Matti and Virpioja, Sami and Kurimo, Mikko}, doi = {10.1016/j.csl.2020.101141}, journal-iso = {COMPUT SPEECH LANG}, journal = {COMPUTER SPEECH AND LANGUAGE}, volume = {66}, unique-id = {31992800}, issn = {0885-2308}, year = {2021}, eissn = {1095-8363} } @CONFERENCE{MTMT:27393011, title = {New Baseline in Automatic Speech Recognition for Northern Sámi}, url = {https://m2.mtmt.hu/api/publication/27393011}, author = {Leinonen, Juho and Smit, Peter and Virpioja, Sami and Kurimo, Mikko}, booktitle = {Proceedings of the Fourth International Workshop on Computatinal Linguistics of Uralic Languages}, unique-id = {27393011}, year = {2018}, pages = {87-97} } @inproceedings{MTMT:31992792, title = {First-Pass Techniques for Very Large Vocabulary Speech Recognition ff Morphologically Rich Languages}, url = {https://m2.mtmt.hu/api/publication/31992792}, author = {Varjokallio, Matti and Virpioja, Sami and Kurimo, Mikko}, booktitle = {IEEE SLT 2018}, doi = {10.1109/SLT.2018.8639691}, unique-id = {31992792}, year = {2018}, pages = {227-234} } @inproceedings{MTMT:27392949, title = {Improved subword modeling for WFST-based speech recognition}, url = {https://m2.mtmt.hu/api/publication/27392949}, author = {Smit, Peter and Virpioja, Sami and Kurimo, Mikko}, booktitle = {Proceedings of Interspeech}, unique-id = {27392949}, year = {2017}, pages = {2551-2555} } @inproceedings{MTMT:3146183, title = {Summarization of Spontaneous Speech using Automatic Speech Recognition and a Speech Prosody based Tokenizer}, url = {https://m2.mtmt.hu/api/publication/3146183}, author = {Szaszák, György and Tündik, Máté Ákos and Beke, András}, booktitle = {Proceedings of the 8th International Joint Conference on Knowledge Discovery, Knowledge Engineering and Knowledge Management}, doi = {10.5220/0006044802210227}, unique-id = {3146183}, year = {2016}, pages = {221-227}, orcid-numbers = {Tündik, Máté Ákos/0000-0003-2085-0136} } @inproceedings{MTMT:3000304, title = {Magyar nyelvű, élő közéleti- és hírműsorok gépi feliratozása}, url = {https://m2.mtmt.hu/api/publication/3000304}, author = {Tarján, Balázs and Varga, Ádám and Tobler, Zoltán and Szaszák, György and Fegyó, Tibor and Bordás, Csaba and Mihajlik, Péter}, booktitle = {XII. Magyar Számítógépes Nyelvészeti Konferencia : MSZNY 2016}, unique-id = {3000304}, year = {2016}, pages = {89-99}, orcid-numbers = {Tarján, Balázs/0000-0002-9676-3082; Fegyó, Tibor/0000-0003-0938-1965; Mihajlik, Péter/0000-0001-7532-9773} } @inproceedings{MTMT:2996839, title = {Szöveg alapú nyelvi elemző kiértékelése gépi beszédfelismerő hibákkal terhelt kimenetén}, url = {https://m2.mtmt.hu/api/publication/2996839}, author = {Tündik, Máté Ákos and Szaszák, György}, booktitle = {XII. Magyar Számítógépes Nyelvészeti Konferencia : MSZNY 2016}, unique-id = {2996839}, year = {2016}, pages = {111-121}, orcid-numbers = {Tündik, Máté Ákos/0000-0003-2085-0136} } @inproceedings{MTMT:27392952, title = {Class n-gram models for very large vocabulary speech recognition of Finnish and Estonian}, url = {https://m2.mtmt.hu/api/publication/27392952}, author = {Varjokallio, Matti and Kurimo, Mikko and Virpioja, Sami}, booktitle = {Statistical Language and Speech Processing}, doi = {10.1007/978-3-319-45925-7_11}, unique-id = {27392952}, abstract = {We study class n-gram models for very large vocabulary speech recognition of Finnish and Estonian. The models are trained with vocabulary sizes of several millions of words using automatically derived classes. To evaluate the models on Finnish and an Estonian broadcast news speech recognition task, we modify Aalto University’s LVCSR decoder to operate with the class n-grams and very large vocabularies. Linear interpolation of a standard n-gram model and a class n-gram model provides relative perplexity improvements of 21.3 % for Finnish and 12.8 % for Estonian over the n-gram model. The relative improvements in word error rates are 5.5 % for Finnish and 7.4 % for Estonian. We also compare our word-based models to a state-of-the-art unlimited vocabulary recognizer utilizing subword n-gram models, and show that the very large vocabulary word-based models can perform equally well or better.}, keywords = {Speech recognition; Language modelling; Class n-gram models; Morphologically rich languages}, year = {2016}, pages = {133-144} } @article{MTMT:2995572, title = {Automatic Close Captioning for Live Hungarian Television Broadcast Speech: A Fast and Resource-Efficient Approach}, url = {https://m2.mtmt.hu/api/publication/2995572}, author = {Varga, A and Tarján, Balázs and Tobler, Z and Szaszák, György and Fegyó, Tibor and Bordas, C and Mihajlik, Péter}, doi = {10.1007/978-3-319-23132-7_13}, journal-iso = {LECT NOTES ARTIF INT}, journal = {LECTURE NOTES IN ARTIFICIAL INTELLIGENCE}, volume = {9319}, unique-id = {2995572}, issn = {0302-9743}, abstract = {In this paper, the application of LVCSR (Large Vocabulary Continuous Speech Recognition) technology is investigated for real-time, resource-limited broadcast close captioning. The work focuses on transcribing live broadcast conversation speech to make such programs accessible to deaf viewers. Due to computational limitations, real time factor (RTF) and memory requirements are kept low during decoding with various models tailored for Hungarian broadcast speech recognition. Two decoders are compared on the direct transcription task of broadcast conversation recordings, and setups employing re-speakers are also tested. Moreover, the models are evaluated on a broadcast news transcription task as well, and different language models (LMs) are tested in order to demonstrate the performance of our systems in settings when low memory consumption is a less crucial factor.}, keywords = {Speech recognition; Hungarian; GMM; Limited resources; Kaldi; DNN; Broadcast conversation; Broadcast news; LVCSR}, year = {2015}, pages = {105-112}, orcid-numbers = {Tarján, Balázs/0000-0002-9676-3082; Fegyó, Tibor/0000-0003-0938-1965; Mihajlik, Péter/0000-0001-7532-9773} }