@inproceedings{MTMT:34557892, title = {ParancsPULI: Az utasításkövető PULI-modell}, url = {https://m2.mtmt.hu/api/publication/34557892}, author = {Yang, Zijian Győző and Dodé, Réka and Héja, Enikő and Laki, László János and Ligeti-Nagy, Noémi and Madarász, Gábor and Váradi, Tamás}, booktitle = {XX. Magyar Számítógépes Nyelvészeti Konferencia}, unique-id = {34557892}, year = {2024}, pages = {61-72} } @article{MTMT:34548692, title = {ProkBERT family: genomic language models for microbiome applications}, url = {https://m2.mtmt.hu/api/publication/34548692}, author = {Ligeti, Balázs and Szepesi-Nagy, István and Bodnár, Babett and Ligeti-Nagy, Noémi and Juhász, János}, doi = {10.3389/fmicb.2023.1331233}, journal-iso = {FRONT MICROBIOL}, journal = {FRONTIERS IN MICROBIOLOGY}, volume = {14}, unique-id = {34548692}, issn = {1664-302X}, year = {2024}, eissn = {1664-302X} } @inproceedings{MTMT:34547046, title = {Magyar nyelvű utasításkövető korpusz építése Stanford Alpaca promptok fordításával és lokalizálásával}, url = {https://m2.mtmt.hu/api/publication/34547046}, author = {Yang, Zijian Győző and Szlávik, Szilárd and Ligeti-Nagy, Noémi}, booktitle = {XX. Magyar Számítógépes Nyelvészeti Konferencia}, unique-id = {34547046}, year = {2024}, pages = {243-255} } @misc{MTMT:34831432, title = {Multilingual comparable corpora of parliamentary debates ParlaMint 3.0}, url = {https://m2.mtmt.hu/api/publication/34831432}, author = {Erjavec, Tomaž and Kopp, Matyáš and Ogrodniczuk, Maciej and Osenova, Petya and Fišer, Darja and Pirker, Hannes and Wissik, Tanja and Schopper, Daniel and Kirnbauer, Martin and Ljubešic, Nikola and Rupnik, Peter and Mochtak, Michal and Pol, Henk van der and Depoorter, Griet and Simov, Kiril and Grigorova, Vladislava and Grigorov, Ilko and Jongejan, Bart and Haltrup Hansen, Dorte and Navarretta, Costanza and Mölder, Martin and Kahusk, Neeme and Vider, Kadri and Bel, Nuria and Antiba-Cartazo, Iván and Pisani, Marilina and Zevallos, Rodolfo and Vladu, Adina Ioana and Magariños, Carmen and Bardanca, Daniel and Barcala, Mario and Garcia, Marcos and Pérez Lago, Mar and GarcLouzao, Pedro and Vivel Couso, Ainhoa and Vázquez Abu, Marta and GarcD, Noelia and Vidal Miguéns, Adrián and Fernández Rei, Elisa and Regueira, Xosé Lu and Diwersy, Sascha and Luxardo, Giancarlo and Coole, Matthew and Rayson, Paul and Nwadukwe, Amanda and Gkoumas, Dimitris and Papavassiliou, Vassilis and Prokopidis, Prokopis and Gavriilidou, Maria and Piperidis, Stelios and Ligeti-Nagy, Noémi and Jelencsik-Mátyus, Kinga and Varga, Zsófia and Dodé, Réka and Barkarson, Starkaður and Agnoloni, Tommaso and Bartolini, Roberto and Frontini, Francesca and Montemagni, Simonetta and Quochi, Valeria and Venturi, Giulia and Ruisi, Manuela and Marchetti, Carlo and Battistoni, Roberto and Darģis, Roberts and van Heusden, Ruben and Marx, Maarten and Tungland, Lars Magne and Rudolf, Micha and Niton, Bart and Aires, José and Mendes, Amália and Cardoso, Aida and Pereira, Rui and Yrjänäinen, Väinö and Norén, Fredrik Mohammadi and Magnusson, M and Jarlbrink, Johan and Meden, Katja and Pančur, Andrej and Ojsteršek, Mihael and Cöltekin, Cagr and Kryvenko, Anna}, unique-id = {34831432}, year = {2023} } @misc{MTMT:34723005, title = {Linguistically annotated multilingual comparable corpora of parliamentary debates ParlaMint.ana 4.0}, url = {https://m2.mtmt.hu/api/publication/34723005}, author = {Erjavec, Tomaž and Kopp, Matyáš and Ogrodniczuk, Maciej and Osenova, Petya and Agerri, Rodrigo and Agirrezabal, Manex and Agnoloni, Tommaso and Aires, José and Albini, Monica and Alkorta, Jon and Antiba-Cartazo, Iván and Arrieta, Ekain and Barcala, Mario and Bardanca, Daniel and Barkarson, Starkaður and Bartolini, Roberto and Battistoni, Roberto and Bel, Nuria and Bonet Ramos, Maria del Mar and Calzada Pérez, Mar and Cardoso, Aida and Cöltekin, Cagr and Coole, Matthew and Darģis, Roberts and de Does, Jesse and de Libano, Ruben and Depoorter, Griet and Depuydt, Katrien and Diwersy, Sascha and Dodé, Réka and Fernandez, Kike and Fernández Rei, Elisa and Frontini, Francesca and Garcia, Marcos and GarcD, Noelia and GarcLouzao, Pedro and Gavriilidou, Maria and Gkoumas, Dimitris and Grigorov, Ilko and Grigorova, Vladislava and Haltrup Hansen, Dorte and Iruskieta, Mikel and Jarlbrink, Johan and Jelencsik-Mátyus, Kinga and Jongejan, Bart and Kahusk, Neeme and Kirnbauer, Martin and Kryvenko, Anna and Ligeti-Nagy, Noémi and Ljubešic, Nikola and Luxardo, Giancarlo and Magariños, Carmen and Magnusson, M and Marchetti, Carlo and Marx, Maarten and Meden, Katja and Mendes, Amália and Mochtak, Michal and Mölder, Martin and Montemagni, Simonetta and Navarretta, Costanza and Niton, Bart and Norén, Fredrik Mohammadi and Nwadukwe, Amanda and Ojsteršek, Mihael and Pančur, Andrej and Papavassiliou, Vassilis and Pereira, Rui and Pérez Lago, Mar and Piperidis, Stelios and Pirker, Hannes and Pisani, Marilina and Pol, Henk van der and Prokopidis, Prokopis and Quochi, Valeria and Rayson, Paul and Regueira, Xosé Lu and Rudolf, Micha and Ruisi, Manuela and Rupnik, Peter and Schopper, Daniel and Simov, Kiril and Sinikallio, Laura and Skubic, Jure and Tamper, Minna and Tungland, Lars Magne and Tuominen, Jouni and van Heusden, Ruben and Varga, Zsófia and Vázquez Abu, Marta and Venturi, Giulia and Vidal Miguéns, Adrián and Vider, Kadri and Vivel Couso, Ainhoa and Vladu, Adina Ioana and Wissik, Tanja and Yrjänäinen, Väinö and Zevallos, Rodolfo and Fišer, Darja}, unique-id = {34723005}, year = {2023} } @misc{MTMT:34722993, title = {Multilingual comparable corpora of parliamentary debates ParlaMint 4.0}, url = {https://m2.mtmt.hu/api/publication/34722993}, author = {Erjavec, Tomaž and Kopp, Matyáš and Ogrodniczuk, Maciej and Osenova, Petya and Agirrezabal, Manex and Agnoloni, Tommaso and Aires, José and Albini, Monica and Alkorta, Jon and Antiba-Cartazo, Iván and Arrieta, Ekain and Barcala, Mario and Bardanca, Daniel and Barkarson, Starkaður and Bartolini, Roberto and Battistoni, Roberto and Bel, Nuria and Bonet Ramos, Maria del Mar and Calzada Pérez, Mar and Cardoso, Aida and Cöltekin, Cagr and Coole, Matthew and Darģis, Roberts and de Libano, Ruben and Depoorter, Griet and Diwersy, Sascha and Dodé, Réka and Fernandez, Kike and Fernández Rei, Elisa and Frontini, Francesca and Garcia, Marcos and GarcD, Noelia and GarcLouzao, Pedro and Gavriilidou, Maria and Gkoumas, Dimitris and Grigorov, Ilko and Grigorova, Vladislava and Haltrup Hansen, Dorte and Iruskieta, Mikel and Jarlbrink, Johan and Jelencsik-Mátyus, Kinga and Jongejan, Bart and Kahusk, Neeme and Kirnbauer, Martin and Kryvenko, Anna and Ligeti-Nagy, Noémi and Ljubešic, Nikola and Luxardo, Giancarlo and Magariños, Carmen and Magnusson, M and Marchetti, Carlo and Marx, Maarten and Meden, Katja and Mendes, Amália and Mochtak, Michal and Mölder, Martin and Montemagni, Simonetta and Navarretta, Costanza and Niton, Bart and Norén, Fredrik Mohammadi and Nwadukwe, Amanda and Ojsteršek, Mihael and Pančur, Andrej and Papavassiliou, Vassilis and Pereira, Rui and Pérez Lago, Mar and Piperidis, Stelios and Pirker, Hannes and Pisani, Marilina and Pol, Henk van der and Prokopidis, Prokopis and Quochi, Valeria and Rayson, Paul and Regueira, Xosé Lu and Rudolf, Micha and Ruisi, Manuela and Rupnik, Peter and Schopper, Daniel and Simov, Kiril and Sinikallio, Laura and Skubic, Jure and Tungland, Lars Magne and Tuominen, Jouni and van Heusden, Ruben and Varga, Zsófia and Vázquez Abu, Marta and Venturi, Giulia and Vidal Miguéns, Adrián and Vider, Kadri and Vivel Couso, Ainhoa and Vladu, Adina Ioana and Wissik, Tanja and Yrjänäinen, Väinö and Zevallos, Rodolfo and Fišer, Darja}, unique-id = {34722993}, year = {2023} } @misc{MTMT:34722976, title = {Linguistically annotated multilingual comparable corpora of parliamentary debates in English ParlaMint-en.ana 3.0}, url = {https://m2.mtmt.hu/api/publication/34722976}, author = {Kuzman, Taja and Ljubešic, Nikola and Erjavec, Tomaž and Kopp, Matyáš and Ogrodniczuk, Maciej and Osenova, Petya and Fišer, Darja and Pirker, Hannes and Wissik, Tanja and Schopper, Daniel and Kirnbauer, Martin and Mochtak, Michal and Rupnik, Peter and Pol, Henk van der and Depoorter, Griet and de Does, Jesse and Simov, Kiril and Grigorova, Vladislava and Grigorov, Ilko and Jongejan, Bart and Haltrup Hansen, Dorte and Navarretta, Costanza and Mölder, Martin and Kahusk, Neeme and Vider, Kadri and Bel, Nuria and Antiba-Cartazo, Iván and Pisani, Marilina and Zevallos, Rodolfo and Regueira, Xosé Lu and Vladu, Adina Ioana and Magariños, Carmen and Bardanca, Daniel and Barcala, Mario and Garcia, Marcos and Pérez Lago, Mar and GarcLouzao, Pedro and Vivel Couso, Ainhoa and Vázquez Abu, Marta and GarcD, Noelia and Vidal Miguéns, Adrián and Fernández Rei, Elisa and Diwersy, Sascha and Luxardo, Giancarlo and Coole, Matthew and Rayson, Paul and Nwadukwe, Amanda and Gkoumas, Dimitris and Papavassiliou, Vassilis and Prokopidis, Prokopis and Gavriilidou, Maria and Piperidis, Stelios and Ligeti-Nagy, Noémi and Jelencsik-Mátyus, Kinga and Varga, Zsófia and Dodé, Réka and Barkarson, Starkaður and Agnoloni, Tommaso and Bartolini, Roberto and Frontini, Francesca and Montemagni, Simonetta and Quochi, Valeria and Venturi, Giulia and Ruisi, Manuela and Marchetti, Carlo and Battistoni, Roberto and Darģis, Roberts and van Heusden, Ruben and Marx, Maarten and Depuydt, Katrien and Tungland, Lars Magne and Rudolf, Micha and Niton, Bart and Aires, José and Mendes, Amália and Cardoso, Aida and Pereira, Rui and Yrjänäinen, Väinö and Norén, Fredrik Mohammadi and Magnusson, M and Jarlbrink, Johan and Meden, Katja and Pančur, Andrej and Ojsteršek, Mihael and Cöltekin, Cagr and Kryvenko, Anna}, unique-id = {34722976}, year = {2023} } @article{MTMT:34130962, title = {Improve Performance of Fine-tuning Language Models with Prompting}, url = {https://m2.mtmt.hu/api/publication/34130962}, author = {Yang, Zijian Győző and Ligeti-Nagy, Noémi}, doi = {10.36244/ICJ.2023.5.10}, journal-iso = {INFOCOMM J}, journal = {INFOCOMMUNICATIONS JOURNAL}, volume = {15}, unique-id = {34130962}, issn = {2061-2079}, abstract = {This paper explores the effectiveness of prompt programming in the fine-tuning process of a Hungarian language model. The study builds on the prior success of prompt engineering in natural language processing tasks and employs the prompting method to enhance the fine-tuning performance of a huBERT model on several benchmark datasets of HuLU. The experimentation involves testing 45 prompt combinations for the HuCoPA dataset and 15 prompt variations for the HuRTE and HuWNLI datasets. The findings reveal that the addition of an instructional text consistently produces the best results across all winning cases, and that the [CLS] token produces the best results in the separator token experiments. The most significant enhancement was observed in the HuWNLI dataset, with an increase in accuracy from 65% to 85%. These results demon- strate that the addition of instruct text is crucial and sufficient in enabling the language model to effectively interpret and solve the Winograd Schemata problem. These results showcase the potential of prompt programming in enhancing the performance of language models in fine-tuning tasks, and highlight the importance of incorporating task-specific instructions to improve model interpretability and accuracy.}, year = {2023}, eissn = {2061-2125}, pages = {62-68} } @inproceedings{MTMT:34070620, title = {An Unsupervised Approach to Characterize the Adjectival Microstructure in a Hungarian Monolingual Explanatory Dictionary}, url = {https://m2.mtmt.hu/api/publication/34070620}, author = {Héja, Enikő and Ligeti-Nagy, Noémi and Simon, László and Lipp, Veronika}, booktitle = {Electronic lexicography in the 21st century (eLex 2023): Invisible Lexicography. Proceedings of the eLex 2023 conference. Brno, 27–29 June 2023}, unique-id = {34070620}, year = {2023}, pages = {150-167} } @article{MTMT:33809078, title = {Building machine reading comprehension model from scratch}, url = {https://m2.mtmt.hu/api/publication/33809078}, author = {Yang, Zijian Győző and Ligeti-Nagy, Noémi}, doi = {10.33039/ami.2023.03.001}, journal-iso = {ANN MATH INFORM}, journal = {ANNALES MATHEMATICAE ET INFORMATICAE}, volume = {57}, unique-id = {33809078}, issn = {1787-5021}, year = {2023}, eissn = {1787-6117}, pages = {107-123} }