@inproceedings{clarketal25coling,author={Clark, Christian and Oh, Byung-Doh and Schuler, William},title={Linear recency bias during training improves Transformers' fit to reading times},year={2025},booktitle={Proceedings of the 31st International Conference on Computational Linguistics},pages={7735-7747},section={papers},}
2024
arXiv
The impact of token granularity on the predictive power of language model surprisal
@article{ohschuler24arxiv,author={Oh, Byung-Doh and Schuler, William},title={The impact of token granularity on the predictive power of language model surprisal},year={2024},journal={arXiv},section={preprints},}
EMNLP
Leading whitespaces of language models’ subword vocabulary pose a confound for calculating word probabilities
Byung-Doh Oh, and William Schuler
In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, 2024
@inproceedings{ohschuler24emnlp,author={Oh, Byung-Doh and Schuler, William},title={Leading whitespaces of language models' subword vocabulary pose a confound for calculating word probabilities},year={2024},booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing},pages={3464--3472},section={papers},}
EACL
Frequency explains the inverse correlation of large language models’ size, training data amount, and surprisal’s fit to reading times
Byung-Doh Oh, Shisen Yue, and William Schuler
In Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics, 2024
@inproceedings{ohetal24eacl,author={Oh, Byung-Doh and Yue, Shisen and Schuler, William},title={Frequency explains the inverse correlation of large language models' size, training data amount, and surprisal's fit to reading times},year={2024},booktitle={Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics},pages={2644--2663},section={papers},}
2023
EMNLP Findings
Transformer-based language model surprisal predicts human reading times best with about two billion training tokens
Byung-Doh Oh, and William Schuler
In Findings of the Association for Computational Linguistics: EMNLP 2023, 2023
@inproceedings{ohschuler23emnlp,author={Oh, Byung-Doh and Schuler, William},title={Transformer-based language model surprisal predicts human reading times best with about two billion training tokens},year={2023},booktitle={Findings of the Association for Computational Linguistics: EMNLP 2023},pages={1915--1921},section={papers},}
ACL
Token-wise decomposition of autoregressive language model hidden states for analyzing model predictions
Byung-Doh Oh, and William Schuler
In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics, 2023
@inproceedings{ohschuler23acl,author={Oh, Byung-Doh and Schuler, William},title={Token-wise decomposition of autoregressive language model hidden states for analyzing model predictions},year={2023},booktitle={Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics},pages={10105--10117},section={papers},}
TACL
Why does surprisal from larger Transformer-based language models provide a poorer fit to human reading times?
Byung-Doh Oh, and William Schuler
Transactions of the Association for Computational Linguistics, 2023
@article{ohschuler23tacl,author={Oh, Byung-Doh and Schuler, William},title={Why does surprisal from larger Transformer-based language models provide a poorer fit to human reading times?},year={2023},journal={Transactions of the Association for Computational Linguistics},volume={11},pages={336--350},section={articles},}
HSP
On the bigger-is-worse nature of pre-trained language model surprisal
Byung-Doh Oh, and William Schuler
In 36th Annual Conference on Human Sentence Processing, 2023
@inproceedings{ohschuler23hspsurp,author={Oh, Byung-Doh and Schuler, William},title={On the bigger-is-worse nature of pre-trained language model surprisal},year={2023},booktitle={36th Annual Conference on Human Sentence Processing},section={abstracts},}
HSP
Memory-based predictors from GPT-2 attention predict reading times over surprisal
Byung-Doh Oh, and William Schuler
In 36th Annual Conference on Human Sentence Processing, 2023
@inproceedings{ohschuler23hspattn,author={Oh, Byung-Doh and Schuler, William},title={Memory-based predictors from GPT-2 attention predict reading times over surprisal},year={2023},booktitle={36th Annual Conference on Human Sentence Processing},section={abstracts},}
2022
EMNLP
Entropy- and distance-based predictors from GPT-2 attention patterns predict reading times over and above GPT-2 surprisal
Byung-Doh Oh, and William Schuler
In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, 2022
@inproceedings{ohschuler22emnlp,author={Oh, Byung-Doh and Schuler, William},title={Entropy- and distance-based predictors from GPT-2 attention patterns predict reading times over and above GPT-2 surprisal},year={2022},booktitle={Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing},pages={9324--9334},section={papers},}
DARPA Risers
Unified unsupervised grammar induction for typologically diverse languages
@inproceedings{oh22darpa,author={Oh, Byung-Doh},title={Unified unsupervised grammar induction for typologically diverse languages},year={2022},booktitle={DARPA Risers},section={abstracts},}
FAI
Comparison of structural parsers and neural language models as surprisal estimators
Byung-Doh Oh, Christian Clark, and William Schuler
@article{ohetal22fai,author={Oh, Byung-Doh and Clark, Christian and Schuler, William},title={Comparison of structural parsers and neural language models as surprisal estimators},year={2022},journal={Frontiers in Artificial Intelligence},volume={5},pages={777963},section={articles},}
2021
EMNLP Findings
Character-based PCFG induction for modeling the syntactic acquisition of morphologically rich languages
Lifeng Jin, Byung-Doh Oh, and William Schuler
In Findings of the Association for Computational Linguistics: EMNLP 2021, 2021
@inproceedings{jinetal21emnlp,author={Jin, Lifeng and Oh, Byung-Doh and Schuler, William},title={Character-based PCFG induction for modeling the syntactic acquisition of morphologically rich languages},year={2021},booktitle={Findings of the Association for Computational Linguistics: EMNLP 2021},pages={4367--4378},section={papers},}
@inproceedings{jaffeetal21emnlp,author={Jaffe, Evan and Oh, Byung-Doh and Schuler, William},title={Coreference-aware surprisal predicts brain response},year={2021},booktitle={Findings of the Association for Computational Linguistics: EMNLP 2021},pages={3351--3356},section={papers},}
ACL
Surprisal estimators for human reading times need character models
Byung-Doh Oh, Christian Clark, and William Schuler
In Proceedings of the Joint Conference of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing, 2021
@inproceedings{ohetal21acl,author={Oh, Byung-Doh and Clark, Christian and Schuler, William},title={Surprisal estimators for human reading times need character models},year={2021},booktitle={Proceedings of the Joint Conference of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing},pages={3746--3757},section={papers},}
CMCL
Contributions of propositional content and syntactic category information in sentence processing
Byung-Doh Oh, and William Schuler
In Proceedings of the 11th Workshop on Cognitive Modeling and Computational Linguistics, 2021
@inproceedings{ohschuler21cmcl,author={Oh, Byung-Doh and Schuler, William},title={Contributions of propositional content and syntactic category information in sentence processing},year={2021},booktitle={Proceedings of the 11th Workshop on Cognitive Modeling and Computational Linguistics},pages={241--250},section={papers},}
CMCL
Team Ohio State at CMCL 2021 shared task: Fine-tuned RoBERTa for eye-tracking data prediction
Byung-Doh Oh
In Proceedings of the 11th Workshop on Cognitive Modeling and Computational Linguistics, 2021
@inproceedings{oh21cmcl,author={Oh, Byung-Doh},title={Team Ohio State at CMCL 2021 shared task: Fine-tuned RoBERTa for eye-tracking data prediction},year={2021},booktitle={Proceedings of the 11th Workshop on Cognitive Modeling and Computational Linguistics},pages={97-101},section={papers},}
CUNY
Comparison of structural and neural language models as surprisal estimators
Byung-Doh Oh, Christian Clark, and William Schuler
In 34th Annual CUNY Conference on Human Sentence Processing, 2021
@inproceedings{ohetal21hspsurp,author={Oh, Byung-Doh and Clark, Christian and Schuler, William},title={Comparison of structural and neural language models as surprisal estimators},year={2021},booktitle={34th Annual CUNY Conference on Human Sentence Processing},section={abstracts},}
CUNY
Contributions of propositional content and syntactic categories in sentence processing
Byung-Doh Oh, and William Schuler
In 34th Annual CUNY Conference on Human Sentence Processing, 2021
@inproceedings{ohetal21hspconcat,author={Oh, Byung-Doh and Schuler, William},title={Contributions of propositional content and syntactic categories in sentence processing},year={2021},booktitle={34th Annual CUNY Conference on Human Sentence Processing},section={abstracts},}
2019
SIGMORPHON
THOMAS: The hegemonic OSU morphological analyzer using seq2seq
Byung-Doh Oh, Pranav Maneriker, and Nanjiang Jiang
In Proceedings of the 16th Workshop on Computational Research in Phonetics, Phonology, and Morphology, 2019
@inproceedings{ohetal19sigmorphon,author={Oh, Byung-Doh and Maneriker, Pranav and Jiang, Nanjiang},title={THOMAS: The hegemonic OSU morphological analyzer using seq2seq},year={2019},booktitle={Proceedings of the 16th Workshop on Computational Research in Phonetics, Phonology, and Morphology},pages={80--86},section={abstracts},}
JLM
Modeling morphological learning, typology, and change: What can the neural sequence-to-sequence framework contribute?
Micha Elsner, Andrea D. Sims, Alexander Erdmann, and 15 more authors
@article{elsneretal19jlm,author={Elsner, Micha and Sims, Andrea D. and Erdmann, Alexander and Hernandez, Antonio and Jaffe, Evan and Jin, Lifeng and Johnson, Martha Booker and Karim, Shuan and King, David L. and Lamberti Nunes, Luana and Oh, Byung-Doh and Rasmussen, Nathan and Shain, Cory and Antetomaso, Stephanie and Dickinson, Kendra V. and Diewald, Noah and McKenzie, Michelle and Stevens-Guille, Symon},title={Modeling morphological learning, typology, and change: What can the neural sequence-to-sequence framework contribute?},year={2019},journal={Journal of Language Modelling},volume={7},number={1},pages={53--98},section={articles},}
AIMM
The role of learnability in morphological change: A computational approach
Evan Jaffe, and Byung-Doh Oh
In Fourth American International Morphology Meeting, 2019
@inproceedings{jaffeoh19aimm,author={Jaffe, Evan and Oh, Byung-Doh},title={The role of learnability in morphological change: A computational approach},year={2019},booktitle={Fourth American International Morphology Meeting},section={abstracts},}
2018
Eng Tea
Exploring English online research and comprehension strategies of Korean college students
@article{ohso18et,author={Oh, Byung-Doh and So, Youngsoon},title={Exploring English online research and comprehension strategies of Korean college students},year={2018},journal={English Teaching},volume={73},number={3},pages={53--76},section={articles},}
2017
FLER
Predicting L2 writing proficiency with computational indices based on n-grams
@article{oh17fler,author={Oh, Byung-Doh},title={Predicting L2 writing proficiency with computational indices based on n-grams},year={2017},journal={Foreign Language Education Research},volume={21},pages={1-20},section={articles},}