@inproceedings{MTMT:34785341, title = {Model-centric data selection: Refining end-to-end speech recognition}, url = {https://m2.mtmt.hu/api/publication/34785341}, author = {Mengke, Dalai and Meng, Yan and Mihajlik, Péter}, booktitle = {2nd Workshop on Intelligent Infocommunication Networks, Systems and Services}, doi = {10.3311/WINS2024-001}, unique-id = {34785341}, year = {2024}, pages = {1-5}, orcid-numbers = {Meng, Yan/0000-0002-2764-0716; Mihajlik, Péter/0000-0001-7532-9773} } @CONFERENCE{MTMT:34402621, title = {What Kind of Multi- or Cross-lingual Pre-training is the most Effective for a Spontaneous, Less-resourced ASR Task?}, url = {https://m2.mtmt.hu/api/publication/34402621}, author = {Mihajlik, Péter and Kádár, Máté Soma and Dobsinszki, Gergely and Meng, Yan and Mengke, Dalai and Linke, Julian and Fegyó, Tibor and Mády, Katalin}, booktitle = {2nd Annual Meeting of the ELRA/ISCA SIG on Under-resourced Languages (SIGUL 2023)}, doi = {10.21437/SIGUL.2023-13}, unique-id = {34402621}, year = {2023}, pages = {58-62}, orcid-numbers = {Mihajlik, Péter/0000-0001-7532-9773; Meng, Yan/0000-0002-2764-0716; Fegyó, Tibor/0000-0003-0938-1965} } @{MTMT:34168004, title = {What do self-supervised speech representations encode? An analysis of languages, varieties, speaking styles and speakers}, url = {https://m2.mtmt.hu/api/publication/34168004}, author = {Linke, J. and Kádár, M.S. and Dobsinszki, G. and Mihajlik, Péter and Kubin, G. and Schuppler, B.}, booktitle = {Proceedings of the 24th International Speech Communication Association, Interspeech 2023}, doi = {10.21437/Interspeech.2023-951}, unique-id = {34168004}, year = {2023}, pages = {5371-5375}, orcid-numbers = {Mihajlik, Péter/0000-0001-7532-9773} } @CONFERENCE{MTMT:33655768, title = {The Akaka Maptask Corpus}, url = {https://m2.mtmt.hu/api/publication/33655768}, author = {Molnár, Cecília Sarolta and Mády, Katalin and Mihajlik, Péter and Gyuris, Beáta}, booktitle = {Speech Research conference}, unique-id = {33655768}, year = {2023}, pages = {81-83}, orcid-numbers = {Mihajlik, Péter/0000-0001-7532-9773; Gyuris, Beáta/0000-0001-8804-7409} } @CONFERENCE{MTMT:33655748, title = {The Budapest Games Corpus}, url = {https://m2.mtmt.hu/api/publication/33655748}, author = {Mády, Katalin and Kohári, Anna and Reichel,, Uwe D. and Szalontai, Ádám and Mihajlik, Péter}, booktitle = {Speech Research conference}, unique-id = {33655748}, year = {2023}, pages = {75-77}, orcid-numbers = {Kohári, Anna/0000-0003-2500-0149; Mihajlik, Péter/0000-0001-7532-9773} } @inproceedings{MTMT:33593235, title = {„Feeding the BEAST” – A BEA Speech Transcriber továbbfejlesztése és integrálása neurális nyelvmodellel}, url = {https://m2.mtmt.hu/api/publication/33593235}, author = {Kádár, Máté Soma and Dobsinszki, Gergely and Mády, Katalin and Mihajlik, Péter}, booktitle = {XIX. Magyar Számítógépes Nyelvészeti Konferencia, MSZNY-2023}, unique-id = {33593235}, year = {2023}, pages = {135-145}, orcid-numbers = {Mihajlik, Péter/0000-0001-7532-9773} } @inproceedings{MTMT:33437265, title = {BEA-Base: A Benchmark for ASR of Spontaneous Hungarian}, url = {https://m2.mtmt.hu/api/publication/33437265}, author = {Mihajlik, Péter and Balog, András and Gráczi, Tekla Etelka and Kohári, Anna and Tarján, Balázs and Mády, Katalin}, booktitle = {LREC 2022, Thirteeth International Conference on Language Resources and Evaluation}, unique-id = {33437265}, year = {2022}, pages = {1970-1977}, orcid-numbers = {Mihajlik, Péter/0000-0001-7532-9773; Gráczi, Tekla Etelka/0000-0003-3351-9661; Kohári, Anna/0000-0003-2500-0149; Tarján, Balázs/0000-0002-9676-3082} } @inbook{MTMT:33283256, title = {A BEA továbbfejlesztése és alkalmazása kontrasztív gépi beszédfelismerési kísérletekre}, url = {https://m2.mtmt.hu/api/publication/33283256}, author = {Mihajlik, Péter and Gráczi, Tekla Etelka and Kohári, Anna and Tarján, Balázs and Balog, András and Mády, Katalin}, booktitle = {Általános nyelvészeti tanulmányok 34.}, unique-id = {33283256}, year = {2022}, pages = {361-380}, orcid-numbers = {Mihajlik, Péter/0000-0001-7532-9773; Gráczi, Tekla Etelka/0000-0003-3351-9661; Kohári, Anna/0000-0003-2500-0149; Tarján, Balázs/0000-0002-9676-3082} } @article{MTMT:33267111, title = {Morphology aware data augmentation with neural language models for online hybrid ASR}, url = {https://m2.mtmt.hu/api/publication/33267111}, author = {Tarján, Balázs and Fegyó, Tibor and Mihajlik, Péter}, doi = {10.1556/2062.2022.00582}, journal-iso = {ACTA LING ACAD}, journal = {ACTA LINGUISTICA ACADEMICA}, volume = {69}, unique-id = {33267111}, issn = {2559-8201}, abstract = {Recognition of Hungarian conversational telephone speech is challenging due to the informal style and morphological richness of the language. Neural Network Language Models (NNLMs) can provide remedy for the high perplexity of the task; however, their high complexity makes them very difficult to apply in the first (single) pass of an online system. Recent studies showed that a considerable part of the knowledge of NNLMs can be transferred to traditional n-grams by using neural text generation based data augmentation. Data augmentation with NNLMs works well for isolating languages; however, we show that it causes a vocabulary explosion in a morphologically rich language. Therefore, we propose a new, morphology aware neural text augmentation method, where we retokenize the generated text into statistically derived subwords. We compare the performance of word-based and subword-based data augmentation techniques with recurrent and Transformer language models and show that subword-based methods can significantly improve the Word Error Rate (WER) while greatly reducing vocabulary size and memory requirements. Combining subword-based modeling and neural language model-based data augmentation, we were able to achieve 11% relative WER reduction and preserve real-time operation of our conversational telephone speech recognition system. Finally, we also demonstrate that subword-based neural text augmentation outperforms the word-based approach not only in terms of overall WER but also in recognition of Out-of-Vocabulary (OOV) words.}, year = {2022}, eissn = {2560-1016}, pages = {581-598}, orcid-numbers = {Tarján, Balázs/0000-0002-9676-3082; Fegyó, Tibor/0000-0003-0938-1965; Mihajlik, Péter/0000-0001-7532-9773} } @inproceedings{MTMT:32728824, title = {Magyar nyelvű időpont-egyeztető dialógusrendszer v2}, url = {https://m2.mtmt.hu/api/publication/32728824}, author = {Nagy, Soma Bálint and Herdinai, Viktor and Pálfi, Gellért and Fegyó, Tibor and Mihajlik, Péter and Farkas, Richárd}, booktitle = {XVIII. Magyar Számítógépes Nyelvészeti Konferencia : MSZNY 2022}, unique-id = {32728824}, year = {2022}, pages = {633-644}, orcid-numbers = {Fegyó, Tibor/0000-0003-0938-1965; Mihajlik, Péter/0000-0001-7532-9773} }