I am an Assistant Professor of Computer Science at the University of California, Irvine.
Toward building general-purpose embodied intelligence, my research focuses on the intersection of computer vision (perception) and robot learning (action).
Our research spans vision-language-action models, human-to-robot learning, sim-to-real transfer, pre-training strategies for embodied agents, and multi-agent collaboration. We're building systems that enable robots to learn from diverse data sources and work together on complex real-world tasks.
I am extremely fortunate to work with an amazing set of students and collaborators. I'm deeply grateful for their support. We are continuing to grow our group.
@article{lin2025craft,
title = {{CRAFT}: A Tendon-Driven Hand with Hybrid Hard-Soft Compliance},
author = {Lin, Leo and Patel, Shivansh and Moon, Jay and Lazebnik, Svetlana and Jain, Unnat},
journal = {Preprint},
year = {2026},
}
@inproceedings{routray2026vipra,
title = {{ViPRA}: Video Prediction for Robot Actions},
author = {Routray, Sandeep and Pan, Hengkai and Jain, Unnat and Bahl, Shikhar and Pathak, Deepak},
booktitle = {ICLR},
year = {2026},
}
@inproceedings{dalal2026attwarp,
title = {Constructive Distortion: Improving {MLLMs} with Attention-Aware Image Warping},
author = {Dalal, Dwip and Vashishtha, Gautam and Mishra, Utkarsh and Kim, Jeonghwan and
Kanda, Madhav and Ha, Hyeonjeong and Lazebnik, Svetlana and Ji, Heng and Jain, Unnat},
booktitle = {ICLR},
year = {2026}
}
@inproceedings{patel2026rigvid,
title = {Robotic Manipulation by Imitating Generated Videos Without Physical Demonstrations},
author = {Patel, Shivansh and Mohan, Shraddhaa and Mai, Hanlin and Jain, Unnat and
Lazebnik, Svetlana and Li, Yunzhu},
booktitle = {ICLR},
year = {2026},
}
@inproceedings{nguyen2025pit,
title = {An Image is Worth More Than 16x16 Patches: Exploring Transformers on Individual Pixels},
author = {Nguyen, Duy-Kien and Assran, Mahmoud and Jain, Unnat and Oswald, Martin R and
Snoek, Cees GM and Chen, Xinlei},
booktitle = {ICLR},
year = {2025}
}
@inproceedings{wasserman2024xgx,
title = {Exploitation-Guided Exploration for Semantic Embodied Navigation},
author = {Wasserman, Justin and Chowdhary, Girish and Gupta, Abhinav and Jain, Unnat},
booktitle = {ICRA},
year = {2024}
}
Habitat 3.0: A Co-Habitat for Humans, Avatars and Robots
Xavi Puig*, Eric Undersander*, Andrew Szot*, Mikael Cote*, Ruslan Partsey*, Jimmy Yang*, Ruta Desai*, Alexander Clegg*, Michal Hlavac, Tiffany Min, Theo Gervet, Vladimír Vondruš, Vincent-Pierre Berges, John Turner, Oleksandr Maksymets, Zsolt Kira, Mrinal Kalakrishnan, Jitendra Malik, Devendra Chaplot, Unnat Jain, Dhruv Batra, Akshara Rai**, Roozbeh Mottaghi** ICLR 2024 paper |
project |
code |
bibtex
Media:
@inproceedings{puig2024habitat3,
title = {{Habitat 3.0}: A Co-Habitat for Humans, Avatars and Robots},
author = {Puig, Xavi and Undersander, Eric and Szot, Andrew and Cote, Mikael and
Partsey, Ruslan and Yang, Jimmy and Desai, Ruta and Clegg, Alexander and
Hlavac, Michal and Min, Tiffany and Gervet, Theo and Vondrus, Vladimir and
Berges, Vincent-Pierre and Turner, John and Maksymets, Oleksandr and
Kira, Zsolt and Kalakrishnan, Mrinal and Malik, Jitendra and
Chaplot, Devendra and Jain, Unnat and Batra, Dhruv and
Rai, Akshara and Mottaghi, Roozbeh},
booktitle = {ICLR},
year = {2024}
}
@inproceedings{dasari2023data4robotics,
title = {An Unbiased Look at Datasets for Visuo-Motor Pre-Training},
author = {Dasari, Sudeep and Srirama, Mohan Kumar and Jain, Unnat and Gupta, Abhinav},
booktitle = {CoRL},
year = {2023},
}
@inproceedings{patel2023vlamp,
title = {Pretrained Language Models as Visual Planners for Human Assistance},
author = {Patel, Dhruvesh and Eghbalzadeh, Hamid and Kamra, Nitin and Iuzzolino, Michael Louis
and Jain, Unnat and Desai, Ruta},
booktitle = {ICCV},
year = {2023},
}
@inproceedings{szot2023zsc,
title = {Adaptive Coordination in Social Embodied Rearrangement},
author = {Szot, Andrew and Jain, Unnat and Batra, Dhruv and Kira, Zsolt and Desai, Ruta and Rai, Akshara},
booktitle = {ICML},
year = {2023}
}
@inproceedings{bahl2023vrb,
title = {Affordances from Human Videos as a Versatile Representation for Robotics},
author = {Bahl, Shikhar and Mendonca, Russell and Chen, Lili and Jain, Unnat and Pathak, Deepak},
booktitle = {CVPR},
year = {2023},
}
@inproceedings{raychaudhuri2024mopa,
title = {{MOPA}: Modular Object Navigation with {PointGoal} Agents},
author = {Raychaudhuri, Sonia and Campari, Tommaso and Jain, Unnat and Savva, Manolis and Chang, Angel X},
booktitle = {WACV},
year = {2024}
}
@inproceedings{wasserman2022sling,
title = {Last-Mile Embodied Visual Navigation},
author = {Wasserman, Justin and Yadav, Karmesh and Chowdhary, Girish and Gupta, Abhinav and Jain, Unnat},
booktitle = {CoRL},
year = {2022},
}
@article{deitke2022retrospectives,
title = {Retrospectives on the Embodied {AI} Workshop},
author = {Deitke, Matt and Batra, Dhruv and Bisk, Yonatan and others},
journal = {arXiv preprint arXiv:2210.06849},
year = {2022}
}
@inproceedings{mittal2022replai,
title = {Learning State-Aware Visual Representations from Audible Interactions},
author = {Mittal, Himangi and Morgado, Pedro and Jain, Unnat and Gupta, Abhinav},
booktitle = {NeurIPS},
year = {2022}
}
@inproceedings{weihs2021advisor,
title = {Bridging the Imitation Gap by Adaptive Insubordination},
author = {Weihs, Luca and Jain, Unnat and Liu, Iou-Jen and Salvador, Jordi and
Lazebnik, Svetlana and Kembhavi, Aniruddha and Schwing, Alexander},
booktitle = {NeurIPS},
year = {2021},
}
@inproceedings{raychaudhuri2021law,
title = {Language-Aligned Waypoint ({LAW}) Supervision for Vision-and-Language Navigation
in Continuous Environments},
author = {Raychaudhuri, Sonia and Wani, Saim and Patel, Shivansh and Jain, Unnat and Chang, Angel X},
booktitle = {EMNLP},
year = {2021}
}
@inproceedings{jain2021gridtopix,
title = {{GridToPix}: Training Embodied Agents with Minimal Supervision},
author = {Jain, Unnat and Liu, Iou-Jen and Lazebnik, Svetlana and Kembhavi, Aniruddha
and Weihs, Luca and Schwing, Alexander},
booktitle = {ICCV},
year = {2021},
}
@inproceedings{patel2021comon,
title = {Interpretation of Emergent Communication in Heterogeneous Collaborative Embodied Agents},
author = {Patel, Shivansh and Wani, Saim and Jain, Unnat and Schwing, Alexander and
Lazebnik, Svetlana and Savva, Manolis and Chang, Angel X},
booktitle = {ICCV},
year = {2021},
}
@inproceedings{liu2021cmae,
title = {Cooperative Exploration for Multi-Agent Deep Reinforcement Learning},
author = {Liu, Iou-Jen and Jain, Unnat and Yeh, Raymond and Schwing, Alexander},
booktitle = {ICML},
year = {2021}
}
@inproceedings{wani2020multion,
title = {{MultiON}: Benchmarking Semantic Map Memory using Multi-Object Navigation},
author = {Wani, Saim and Patel, Shivansh and Jain, Unnat and Chang, Angel X and Savva, Manolis},
booktitle = {NeurIPS},
year = {2020},
}
@article{weihs2020allenact,
title = {{AllenAct}: A Framework for Embodied {AI} Research},
author = {Weihs, Luca and Salvador, Jordi and Kotar, Klemen and Jain, Unnat and
Zeng, Kuo-Hao and Mottaghi, Roozbeh and Kembhavi, Aniruddha},
journal = {arXiv preprint arXiv:2008.12760},
year = {2020},
}
@inproceedings{jain2020cordialsync,
title = {A Cordial Sync: Going Beyond Marginal Policies for Multi-Agent Embodied Tasks},
author = {Jain, Unnat and Weihs, Luca and Kolve, Eric and Farhadi, Ali and
Lazebnik, Svetlana and Kembhavi, Aniruddha and Schwing, Alexander},
booktitle = {ECCV},
year = {2020},
}
@inproceedings{chen2020soundspaces,
title = {{SoundSpaces}: Audio-Visual Navigation in {3D} Environments},
author = {Chen, Changan and Jain, Unnat and Schissler, Carl and Gari, Sebastia Vicenc Amengual and
Al-Halah, Ziad and Ithapu, Vamsi Krishna and Robinson, Philip and Grauman, Kristen},
booktitle = {ECCV},
year = {2020},
}
@inproceedings{lin2019tabvcr,
title = {{TAB-VCR}: Tags and Attributes based {VCR} Baselines},
author = {Lin, Jingxiang and Jain, Unnat and Schwing, Alexander},
booktitle = {NeurIPS},
year = {2019}
}
@inproceedings{jain2019twobody,
title = {Two Body Problem: Collaborative Visual Task Completion},
author = {Jain, Unnat and Weihs, Luca and Kolve, Eric and Rastegari, Mohammad and
Lazebnik, Svetlana and Farhadi, Ali and Schwing, Alexander and Kembhavi, Aniruddha},
booktitle = {CVPR},
year = {2019},
}
@inproceedings{jain2018visualdialog,
title = {Two Can Play This Game: Visual Dialog with Discriminative Question Generation and Answering},
author = {Jain, Unnat and Lazebnik, Svetlana and Schwing, Alexander},
booktitle = {CVPR},
year = {2018}
}
@inproceedings{jain2017creativity,
title = {Creativity: Generating Diverse Questions using Variational Autoencoders},
author = {Jain, Unnat and Zhang, Ziyu and Schwing, Alexander},
booktitle = {CVPR},
year = {2017},
}
@inproceedings{jain2017crv,
title = {Compact Environment-Invariant Codes for Robust Visual Place Recognition},
author = {Jain, Unnat and Namboodiri, Vinay and Pandey, Gaurav},
booktitle = {CRV},
year = {2017}
}