<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE root>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en"><front><journal-meta><journal-id journal-id-type="publisher-id">Macrosociolinguistics and Minority Languages</journal-id><journal-title-group><journal-title xml:lang="en">Macrosociolinguistics and Minority Languages</journal-title><trans-title-group xml:lang="ru"><trans-title>Macrosociolinguistics and Minority Languages</trans-title></trans-title-group></journal-title-group><issn publication-format="electronic">2949-5997</issn><publisher><publisher-name xml:lang="en">Peoples' Friendship University of Russia</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">50694</article-id><article-id pub-id-type="doi">10.22363/2949-5997-2025-3-2-131-145</article-id><article-id pub-id-type="edn">HHYPZI</article-id><article-categories><subj-group subj-group-type="toc-heading" xml:lang="en"><subject>The Languages of the Peoples of Russian Federation:  Digital Documentation Tools and Media Accessibility</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="ru"><subject>Языки народов Российской Федерации: цифровые  инструменты документирования и медиадоступность</subject></subj-group><subj-group subj-group-type="article-type"><subject>Research Article</subject></subj-group></article-categories><title-group><article-title xml:lang="en">Production of audiobooks in the languages of the peoples of Russia using speech synthesizers: problems and prospects</article-title><trans-title-group xml:lang="ru"><trans-title>Производство аудиокниг на языках народов России с использованием синтезаторов речи: проблемы и перспективы</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-5006-5975</contrib-id><contrib-id contrib-id-type="spin">5459-0852</contrib-id><name-alternatives><name xml:lang="en"><surname>Pozhidaev</surname><given-names>Mikhail S.</given-names></name><name xml:lang="ru"><surname>Пожидаев</surname><given-names>Михаил Сергеевич</given-names></name></name-alternatives><bio xml:lang="en"><p>Ph.D. in computer science, Associate Professor at the Department of Theoretical Foundations of Computer Science at the Institute of Applied Mathematics and Computer Science</p></bio><bio xml:lang="ru"><p>кандидат технических наук, доцент кафедры теоретических основ информатики института прикладной математики и компьютерных наук</p></bio><email>msp@luwrain.org</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-1825-7379</contrib-id><contrib-id contrib-id-type="spin">7424-5366</contrib-id><name-alternatives><name xml:lang="en"><surname>Teplykh</surname><given-names>Elena S.</given-names></name><name xml:lang="ru"><surname>Теплых</surname><given-names>Елена Сергеевна</given-names></name></name-alternatives><bio xml:lang="en"><p>psychologist, a junior researcher at the Laboratory of Interdisciplinary Research</p></bio><bio xml:lang="ru"><p>психолог, младший научный сотрудник лаборатории междисциплинарных исследований</p></bio><email>elena@luwrain.org</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0009-0005-6954-6874</contrib-id><contrib-id contrib-id-type="spin">6954-5385</contrib-id><name-alternatives><name xml:lang="en"><surname>Danilov</surname><given-names>Sergey I.</given-names></name><name xml:lang="ru"><surname>Данилов</surname><given-names>Сергей Ильич</given-names></name></name-alternatives><bio xml:lang="en"><p>PhD student at the Department of General and Russian Linguistics, Faculty of Philology</p></bio><bio xml:lang="ru"><p>аспирант кафедры общего и русского языкознания филологического факультета</p></bio><email>1042250116@rudn.ru</email><xref ref-type="aff" rid="aff2"/></contrib></contrib-group><aff-alternatives id="aff1"><aff><institution xml:lang="en">National Research Tomsk State University</institution></aff><aff><institution xml:lang="ru">Национальный исследовательский Томский государственный университет</institution></aff></aff-alternatives><aff-alternatives id="aff2"><aff><institution xml:lang="en">RUDN University</institution></aff><aff><institution xml:lang="ru">Российский университет дружбы народов</institution></aff></aff-alternatives><pub-date date-type="pub" iso-8601-date="2026-06-17" publication-format="electronic"><day>17</day><month>06</month><year>2026</year></pub-date><volume>3</volume><issue>2</issue><issue-title xml:lang="en"/><issue-title xml:lang="ru"/><fpage>131</fpage><lpage>145</lpage><history><date date-type="received" iso-8601-date="2026-06-18"><day>18</day><month>06</month><year>2026</year></date></history><permissions><copyright-statement xml:lang="en">Copyright ©; 2025, Pozhidaev M.S., Teplykh E.S., Danilov S.I.</copyright-statement><copyright-statement xml:lang="ru">Copyright ©; 2025, Пожидаев М.С., Теплых Е.С., Данилов С.И.</copyright-statement><copyright-year>2025</copyright-year><copyright-holder xml:lang="en">Pozhidaev M.S., Teplykh E.S., Danilov S.I.</copyright-holder><copyright-holder xml:lang="ru">Пожидаев М.С., Теплых Е.С., Данилов С.И.</copyright-holder><ali:free_to_read xmlns:ali="http://www.niso.org/schemas/ali/1.0/"/><license><ali:license_ref xmlns:ali="http://www.niso.org/schemas/ali/1.0/">http://creativecommons.org/licenses/by/4.0</ali:license_ref></license></permissions><self-uri xlink:href="https://macrosociolingusictics.ru/MML/article/view/50694">https://macrosociolingusictics.ru/MML/article/view/50694</self-uri><abstract xml:lang="en"><p>The creation of audiobooks in the languages of the peoples of Russia using speech synthesizers is a scientifically and socially significant task. The relevance of the research is driven by the development of speech technologies and state policies supporting linguistic diversity, including in the digital space. The stady examines a standard algorithm for audiobook creation, distinguishing between invariant and language-specific development stages. The study notes that the main difficulties are associated with the stages requiring linguistic adaptation of the text for speech synthesis: annotation and the expansion of abbreviations and acronyms. For low-resource languages, tasks such as segmentation, tokenization, and contextual annotation, including the processing of homographs and specific phonetic features, pose particular challenges. In conclusion, it is argued that full automation of audiobook creation for the languages of Russia’s peoples using current speech synthesis technology is currently unfeasible. Developing audiobooks in such languages requires the prior creation of specialized linguistic resources. A necessary condition is the formation of a parallel corpus of texts and audio recordings produced by native speakers. Therefore, the successful implementation of such projects demands significant preliminary work on compiling training datasets and adapting algorithms to the specific features of each language.</p></abstract><trans-abstract xml:lang="ru"><p>Создание аудиокниг на языках народов России с применением синтезаторов речи - научно и социально значимая задача. Актуальность исследования обусловлена развитием речевых технологий и государственной политикой поддержки языкового разнообразия в т. ч. в цифровом пространстве. Рассмотрен типовой алгоритм создания аудиокниги, выделены инвариантные и лингво-специфичные этапы разработки. Отмечено, что основные сложности связаны с этапами, требующими языковой адаптации текста к озвучиванию синтезатором речи: аннотированием, расшифровкой аббревиатур и сокращений. Для малоресурсных языков особую проблему представляют задачи сегментации, токенизации и контекстного аннотирования, включая обработку омографов и фонетических особенностей конкретных языков. Сделан вывод о невозможности полной автоматизации процесса создания аудиокниг на языках народов России с использованием синтезаторов речи на данном этапе развития этой технологии. Создание аудиокниг на таких языках требует предварительной разработки специализированных лингвистических ресурсов. Необходимым условием является формирование параллельного корпуса текстов и аудиозаписей, созданных носителями языка. Таким образом, успешная реализация подобных проектов требует значительных предварительных работ по сбору обучающих датасетов и адаптации алгоритмов под специфику конкретного языка.</p></trans-abstract><kwd-group xml:lang="en"><kwd>minority languages</kwd><kwd>low-resource languages</kwd><kwd>machine learning</kwd><kwd>text recognition</kwd><kwd>speech synthesis</kwd><kwd>recurrent neural networks</kwd></kwd-group><kwd-group xml:lang="ru"><kwd>миноритарные языки</kwd><kwd>малоресурсные языки</kwd><kwd>машинное обучение</kwd><kwd>распознавание текста</kwd><kwd>синтезирование речи</kwd><kwd>рекуррентные нейронные сети</kwd></kwd-group><funding-group/></article-meta><fn-group/></front><body></body><back><ref-list><ref id="B1"><label>1.</label><citation-alternatives><mixed-citation xml:lang="en">Alyunina, Yu.M. (2021). «Geometry in Russian»: online course on Russian language for specific purpose. In А.А. Urazbekova, Yu.М. Alyunina, А.S. Vasilieva, V.V. Samsonova, E.S. Sedova, T.A. Sirotina, Modern Russian Language: Functioning and Teaching Problems: Bulletin. XXVI International Scientific and Practical Conference, Budapest, May 14, 2021. Volume 35. [Sovremennyi russkii yazyk: funktsionirovanie i problemy prepodavaniya: Vestnik. XXVI Mezhdunarodnaya nauchno-­prakticheskaya konferentsiya, Budapesht, 14 maya 2021 goda. Tom 35]. Budapest: Russian Center for Science and Culture in Budapest Publ. P. 7–17. (In Russ.). EDN: UCTJWX</mixed-citation><mixed-citation xml:lang="ru">Алюнина Ю.М. «Геометрия по-русски»: организация учебного материала в электронном курсе по научному стилю речи // Современный русский язык: функционирование и проблемы преподавания: Вестник. XXVI Международная научно-­практическая конференция, Будапешт, 14 мая 2021 года. Т. 35 / под ред. А.А. Уразбековой, Ю.М. Алюниной, А.С. Васильевой, В.В. Самсоновой, Е.С. Седовой, Т.А. Сиротиной. Будапешт : Российский центр науки и культуры в Будапеште, 2021. С. 7–17. EDN: UCTJWX</mixed-citation></citation-alternatives></ref><ref id="B2"><label>2.</label><citation-alternatives><mixed-citation xml:lang="en">Alyunina, Yu.М. (2025). Tsifrovye tekhnologii v perevode [Digital technologies in translation]. Lan’ Publ. (In Russ.).</mixed-citation><mixed-citation xml:lang="ru">Алюнина Ю.М. Цифровые технологии в переводе. СПб. : Лань, 2025. 144 с.</mixed-citation></citation-alternatives></ref><ref id="B3"><label>3.</label><citation-alternatives><mixed-citation xml:lang="en">Arulprakash, A., Synthiya, M., Vijila, T., &amp; Rajabhusanam, C. (2023). Tamil speech synthesizer app for android: Text processing module enhancement. Indian Journal of Science and Technology, 16(7), 485–491. https://doi.org/10.17485/IJST/v16i7.2165 EDN: ZDIRTC</mixed-citation><mixed-citation xml:lang="ru">Воркунова И.О., Кисиева А.А., Наумова А.А. Редактирование как один из основных этапов составления тифломаршрута // Теория и практика составления тифломаршрутов для навигации лиц с нарушением зрения на станциях метрополитена : монография / под ред. А.В. Козуляева. Казань : Бук, 2025. С. 112–116. EDN: EUWYYO</mixed-citation></citation-alternatives></ref><ref id="B4"><label>4.</label><citation-alternatives><mixed-citation xml:lang="en">Drozashchikh, N.V., &amp; Efimova, E.V. (2025). Lemmatization of low-resource languages in diachronic linguistics: problems and solutions. Izvestia: Herzen University Journal of Humanities &amp; Sciences, (217), 302–311. (In Russ.). https://www.doi.org/10.33910/1992–6464–2025–217–302–311 EDN: AKDLRR</mixed-citation><mixed-citation xml:lang="ru">Дрожащих Н.В., Ефимова Е.В. Лемматизация малоресурсных языков в диахронической лингвистике: проблемы и решения // Известия Российского государственного педагогического университета РГПУ им. А.И. Герцена. 2025. № 217. С. 302–311. https:// doi.org/10.33910/1992–6464–2025–217–302–311 EDN: AKDLRR</mixed-citation></citation-alternatives></ref><ref id="B5"><label>5.</label><citation-alternatives><mixed-citation xml:lang="en">Li, N., Liu, S., Liu, Y., Zhao, S., &amp; Liu, M. (2019). Neural speech synthesis with transformer network. Proceedings of the AAAI Conference on Artificial Intelligence, 33(01), 6706–6713. https://doi.org/10.1609/aaai.v33i01.33016706</mixed-citation><mixed-citation xml:lang="ru">Лобарёв Д.С., Лобарёв Н.Д. Синтез недетерминированных конечных автоматов по регулярным выражениям алгоритмом Глушкова в формате JFF // Вестник Полоцкого государственного университета. Серия С. Фундаментальные науки. 2025. № 1 (44). С. 9–13. https://doi.org/10.52928/2070-1624-2025-44-1-9-13 EDN: TEVIHV</mixed-citation></citation-alternatives></ref><ref id="B6"><label>6.</label><citation-alternatives><mixed-citation xml:lang="en">Lobaryov, D.S., &amp; Lobaryov, N.D. (2025). Synthesis of nondeterministic finite automaton from regular expressions by Glushkov’s algorithm in JFF format. Herald of Polotsk State University. Series C. Fundamental Sciences, (1), 9–13. (In Russ.). https://doi.org/10.52928/2070–1624–2025–44–1–9–13 EDN: TEVIHV</mixed-citation><mixed-citation xml:lang="ru">Пунегова Г.В. Тембральные характеристики голоса персонажа (на примере прозаических произведений коми писателей) // Вестник угроведения. 2025. Т. 15. № 1 (60). С. 80–89. https://doi.org/10.30624/2220-4156-2025-15-1-80-89 EDN: EMPAHK</mixed-citation></citation-alternatives></ref><ref id="B7"><label>7.</label><citation-alternatives><mixed-citation xml:lang="en">Mache, S.R., Baheti, M.R., &amp; Namrata Mahender, C. (2015). Review on text-to-speech synthesizer. International Journal of Advanced Research in Computer and Communication Engineering, 4(8), 54–59. https://doi.org/10.17148/IJARCCE.2015.4812</mixed-citation><mixed-citation xml:lang="ru">Arulprakash A., Synthiya M., Vijila T., Rajabhusanam C. Tamil speech synthesizer app for android: text processing module enhancement // Indian Journal of Science and Technology. 2023. Vol. 16. № 7. P. 485–491. https://doi.org/10.17485/IJST/v16i7.2165 EDN: ZDIRTC</mixed-citation></citation-alternatives></ref><ref id="B8"><label>8.</label><citation-alternatives><mixed-citation xml:lang="en">Punegova, G.V. (2025). Timbral characteristics of a character’s voice (on the example of prose works by Komi writers). Bulletin of Ugric Studies, 15(1), 80–89. (In Russ.). https://doi.org/10.30624/2220–4156–2025–15–1–80–89 EDN: EMPAHK</mixed-citation><mixed-citation xml:lang="ru">Li N., Liu S., Liu Y., Zhao S., Liu M. Neural speech synthesis with transformer network // Proceedings of the AAAI Conference on Artificial Intelligence. 2019. Vol. 33. № 01. P. 6706–6713. https://doi.org/10.1609/aaai.v33i01.33016706</mixed-citation></citation-alternatives></ref><ref id="B9"><label>9.</label><citation-alternatives><mixed-citation xml:lang="en">Tan, X., Qin, T., Soong, F., &amp; Liu, T.-Y. (2021). A survey on neural speech synthesis. arXiv:2106.15561v3. https://doi.org/10.48550/arXiv.2106.15561</mixed-citation><mixed-citation xml:lang="ru">Mache S.R., Baheti M.R., Namrata Mahender C. Review on text-to-speech synthesizer // International Journal of Advanced Research in Computer and Communication Engineering. 2015. Vol. 4. № 8. P. 54–59. https://doi.org/10.17148/IJARCCE.2015.4812</mixed-citation></citation-alternatives></ref><ref id="B10"><label>10.</label><citation-alternatives><mixed-citation xml:lang="en">Tosun, M., &amp; Dincer, K. (2018). Determination of sound transmission loss in lightweight concrete walls and modeling artificial neural network. Selçuk Üniversitesi Mühendislik Bilim Ve Teknoloji Dergisi, 6(3), 461–477. https://doi.org/10.15317/Scitech.2018.145</mixed-citation><mixed-citation xml:lang="ru">Tan X., Qin T., Soong F., Liu T.-Y. A survey on neural speech synthesis // arXiv. 2021. https://doi.org/10.48550/arXiv.2106.15561</mixed-citation></citation-alternatives></ref><ref id="B11"><label>11.</label><citation-alternatives><mixed-citation xml:lang="en">Wang, Y., Skerry-­Ryan, RJ, Stanton, D., Wu, Y., Weiss, R.J., Jaitly, N., Yang, Z., Xiao, Y., Chen, Zh., Bengio, S., Le, Q., Agiomyrgiannakis, Y., Clark, R., &amp; Saurous, R.A. (2017). Tacotron: Towards end-to-end speech synthesis. arXiv:1703.10135. https://doi.org/10.48550/arXiv.1703.10135</mixed-citation><mixed-citation xml:lang="ru">Tosun M., Dincer K. Determination of sound transmission loss in lightweight concrete walls and modeling artificial neural network // Selçuk Üniversitesi Mühendislik Bilim Ve Teknoloji Dergisi. 2018. Vol. 6. № 3. P. 461–477. https://doi.org/10.15317/Scitech.2018.145</mixed-citation></citation-alternatives></ref><ref id="B12"><label>12.</label><citation-alternatives><mixed-citation xml:lang="en">Vorkunova, I.О., Kisieva, А.А., &amp; Naumova, А.А. (2025). Editing as one of the main stages of compiling a typhlo route. In Kozulaev, A.V. Teoriya i praktika sostavleniya tiflomarshrutov dlya navigatsii lits s narusheniem zreniya na stantsiyakh metropolitena [Theory and practice of creating tiflo-­routes for navigation of visually impaired people at metro stations]. Kazan’: Buk Publ. P. 112–116. (In Russ.). EDN: EUWYYO</mixed-citation><mixed-citation xml:lang="ru">Wang, Y., Skerry-­Ryan, RJ, Stanton, D., Wu, Y., Weiss, R.J., Jaitly, N., Yang, Z., Xiao, Y., Chen, Zh., Bengio, S., Le, Q., Agiomyrgiannakis, Y., Clark, R., Saurous, R.A. Tacotron: Towards End-to-­End Speech Synthesis // arXiv:1703.10135. 2017. https://doi.org/10.48550/arXiv.1703.10135</mixed-citation></citation-alternatives></ref><ref id="B13"><label>13.</label><citation-alternatives><mixed-citation xml:lang="en">Zheng, Y., Li, X., Xie, F., &amp; Lu, L. (2020). Improving end-to-end speech synthesis with local recurrent neural network enhanced transformer. In ICASSP 2020–2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Barcelona: ICASSP. P. 6734–6738. https://doi.org/10.1109/ICASSP40776.2020.9054148</mixed-citation><mixed-citation xml:lang="ru">Zheng Y., Li X., Xie F., Lu L. Improving end-to-end speech synthesis with local recurrent neural network enhanced transformer // ICASSP 2020–2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). Barcelona : ICASSP, 2020. P. 6734–6738. https://doi.org/10.1109/ICASSP40776.2020.9054148</mixed-citation></citation-alternatives></ref></ref-list></back></article>
