[
    {
        "id": "1208",
        "title": "Creative Momentum Transfer: How Timing and Labeling of AI Suggestions Shape Iterative Human Ideation",
        "authors": "Guangrui Fan, Dandan Liu, Lihu Pan, Yishan Huang",
        "abstract": "Human–AI collaboration is increasingly integral to a variety of domains where creative ideation unfolds in iterative cycles, yet most existing studies evaluate AI-generated concepts in a single step. This paper addresses the gap by investigating “Creative Momentum Transfer”—how the timing (early vs. late) and labeling (AI-labeled vs. unlabeled) of AI prompts shape multi-round human ideation. In a between-subjects experiment (N = 247), participants proposed solutions for plastic pollution over two rounds, with AI suggestions introduced either at the outset or mid-process and labeled explicitly or not. Results reveal that early AI prompts increase overall creativity but induce stronger anchoring, whereas late AI prompts trigger a mid-round pivot that fosters more divergent thinking yet still boosts final outcomes compared to a no-AI control. Labeling amplifies both subjective and objective adoption of AI ideas, although most participants could detect AI sources even when unlabeled. Furthermore, qualitative interviews highlight nuanced perspectives on perceived ownership, authenticity, and the ways in which labeling triggers deeper scrutiny of the AI’s style. By demonstrating that baseline creativity moderates these effects more robustly than trust in AI, this study advances our theoretical understanding of multi-round human–AI synergy while offering design guidelines for next-generation creativity support systems. We discuss how user-centered design can balance rapid convergence (via early AI) with strategic pivot opportunities (via late AI) and weigh transparent labeling against ethical considerations of authorship and user autonomy.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "11:00",
        "session": "Humans and AI",
        "repo": "https:\/\/github.com\/GerryFAN0706\/CreativeMomentumTransfer"
    },
    {
        "id": "8556",
        "title": "Explainability Through Human-Centric Design for XAI in Lung Cancer Detection",
        "authors": "Amy Rafferty, Rishi Ramaesh, Ajitha Rajan",
        "abstract": "Deep learning models have shown promise in lung pathology detection from chest X-rays, but widespread clinical adoption remains limited due to opaque model decision-making. In prior work, we introduced ClinicXAI, a human-centric, expert-guided concept bottleneck model (CBM) designed for interpretable lung cancer diagnosis. We now extend that approach and present XpertXAI, a generalizable expert-driven model that preserves human-interpretable clinical concepts while scaling to detect multiple lung pathologies. Using a high-performing InceptionV3-based classifier and a public dataset of chest X-rays with radiology reports, we compare XpertXAI against leading post-hoc explainability methods and an unsupervised CBM, XCBs. We assess explanations through comparison with expert radiologist annotations and medical ground truth. Although XpertXAI is trained for multiple pathologies, our expert validation focuses on lung cancer. We find that existing techniques frequently fail to produce clinically meaningful explanations, omitting key diagnostic features and disagreeing with radiologist judgments. XpertXAI not only outperforms these baselines in predictive accuracy but also delivers concept-level explanations that better align with expert reasoning. While our focus remains on explainability in lung cancer detection, this work illustrates how human-centric model design can be effectively extended to broader diagnostic contexts — offering a scalable path toward clinically meaningful explainable AI in medical diagnostics.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "Human Centred AI (2\/2)",
        "repo": "https:\/\/github.com\/AmyRaff\/concept-explanations",
        "poster_positions": "From board n131 to board n136"
    },
    {
        "id": "8914",
        "title": "HCRide: Harmonizing Passenger Fairness and Driver Preference for Human-Centered Ride-Hailing",
        "authors": "Lin Jiang, Yu Yang, Guang Wang",
        "abstract": "Order dispatch systems play a vital role in ride-hailing services, which directly influence operator revenue, driver profit, and passenger experience. Most existing work focuses on improving system efficiency in terms of operator revenue, which may cause a bad experience for both passengers and drivers. Hence, in this work, we aim to design a human-centered ride-hailing system by considering both passenger fairness and driver preference without compromising the overall system efficiency. However, it is nontrivial to achieve this target due to the potential conflicts between passenger fairness and driver preference since optimizing one may sacrifice the other. To address this challenge, we design HCRide, a Human-Centered Ride-hailing system based on a novel multi-agent reinforcement learning algorithm called Harmonization-oriented Actor-Bi-Critic (Habic), which includes three major components (i.e., a multi-agent competition mechanism, a dynamic Actor network, and a Bi-Critic network) to optimize system efficiency and passenger fairness with driver preference consideration. We extensively evaluate our HCRide using two real-world ride-hailing datasets from Shenzhen and New York City. Experimental results show our HCRide effectively improves system efficiency by 2.02%, fairness by 5.39%, and driver preference by 10.21% compared to state-of-the-art baselines.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "11:00",
        "session": "Humans and AI",
        "repo": "https:\/\/github.com\/LinJiang18\/HCRide"
    },
    {
        "id": "8844",
        "title": "The Delta of Thought: Channeling Rivers of Commonsense Knowledge in the Sea of Metaphorical Interpretations",
        "authors": "Antonio Lieto, Gian Luca Pozzato, Stefano Zoia",
        "abstract": "We propose a system called METCL (Metaphor Elaboration in Typicality-Based Compositional Logic) able to generate and identify metaphors by using the TCL reasoning framework, specialized in human-like commonsense concept combination. We show that METCL is able to improve both state of-the-art Large Language Models (e.g DeepSeek-R1, GPT-4o, Qwen2.5-Max) and symbolic ones in the task of metaphor identification. Additionally, we show how the metaphors generated by METCL are generally well accepted by human subjects. The obtained results are encouraging and pave the way to research in automatic metaphor generation and comprehension based on the assumption that metaphors interpretation can be partially regarded as a categorization problem relying on generative commonsense concept combination.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "Human Centred AI (2\/2)",
        "repo": "https:\/\/github.com\/StefanoZoia\/METCL",
        "poster_positions": "From board n131 to board n136"
    },
    {
        "id": "9184",
        "title": "Enhancing Automated Grading in Science Education through LLM-Driven Causal Reasoning and Multimodal Analysis",
        "authors": "Haohao Zhu, Tingting Li, Peng He, Jiayu Zhou",
        "abstract": "Automated assessment of open responses in K–12 science education poses significant challenges due to the multimodal nature of student work, which often integrates textual explanations, drawings, and handwritten elements. Traditional evaluation methods that focus solely on textual analysis fail to capture the full breadth of student reasoning and are susceptible to biases such as handwriting neatness or answer length. In this paper, we propose a novel LLM-augmented multimodal evaluation framework that addresses these limitations through a comprehensive, bias-corrected grading system. Our approach leverages LLMs to generate causal knowledge graphs that encapsulate the essential conceptual relationships in student responses, comparing these graphs with those derived automatically from the rubrics and submissions. Experimental results demonstrate that our framework improves grading accuracy and consistency over deep supervised learning and few-shot LLM baselines.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "11:30",
        "session": "Human Centred AI (1\/2)",
        "poster_positions": "From board n137 to board n138"
    },
    {
        "id": "8947",
        "title": "Shaping Shared Languages: Human and Large Language Models' Inductive Biases in Emergent Communication",
        "authors": "Tom Kouwenhoven, Max Peeperkorn, Roy de Kleijn, Tessa Verhoef",
        "abstract": "Languages are shaped by the inductive biases of their users. Using a classical referential game, we investigate how artificial languages evolve when optimised for inductive biases in humans and large language models (LLMs) via Human-Human, LLM-LLM and Human-LLM experiments. We show that referentially grounded vocabularies emerge that enable reliable communication in all conditions, even when humans and LLMs collaborate. Comparisons between conditions reveal that languages optimised for LLMs subtly differ from those optimised for humans. Interestingly, interactions between humans and LLMs alleviate these differences and result in vocabularies more human-like than LLM-like. These findings advance our understanding of the role inductive biases in LLMs play in the dynamic nature of human language and contribute to maintaining alignment in human and machine communication. In particular, our work underscores the need to think of new LLM training methods that include human interaction and shows that using communicative success as a reward signal can be a fruitful, novel direction.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "11:30",
        "session": "Human Centred AI (1\/2)",
        "repo": "https:\/\/osf.io\/ep6mw\/?view_only=8e145c5438bb4dae91f21e126dad55a8",
        "poster_positions": "From board n137 to board n138"
    },
    {
        "id": "8796",
        "title": "Reflective Verbal Reward Design for Pluralistic Alignment",
        "authors": "Carter Blair, Kate Larson, Edith Law",
        "abstract": "AI agents are commonly aligned with \"human values\" through reinforcement learning from human feedback (RLHF), where a single reward model is learned from aggregated human feedback and used to align an agent's behavior. However, human values are not homogeneous--different people hold distinct and sometimes conflicting values. Aggregating feedback into a single reward model risks disproportionately suppressing minority preferences. To address this, we present a novel reward modeling approach for learning individualized reward models. Our approach uses a language model to guide users through reflective dialogues where they critique agent behavior and construct their preferences. This personalized dialogue history, containing the user's reflections and critiqued examples, is then used as context for another language model that serves as an individualized reward function (what we call a \"verbal reward model\") for evaluating new trajectories. In studies with 30 participants, our method achieved a 9-12% improvement in accuracy over non-reflective verbal reward models while being more sample efficient than traditional supervised learning methods.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "Human Centred AI (2\/2)",
        "repo": "https:\/\/osf.io\/8yxf2\/",
        "poster_positions": "From board n131 to board n136"
    },
    {
        "id": "8697",
        "title": "Hand by Hand: LLM Driving EMS Assistant for Operational Skill Learning",
        "authors": "Wei Xiang, Ziyue Lei, Haoyuan Che, Fangyuan Ye, Xueting Wu, Lingyun Sun",
        "abstract": "Operational skill learning, inherently physical and reliant on hands-on practice and kinesthetic feedback, has yet to be effectively replicated in large language model (LLM)-supported training. Current LLM training assistants primarily generate customized textual feedback, neglecting the crucial kinesthetic modality. This gap derives from the textual and uncertain nature of LLMs, compounded by concerns on user acceptance of LLM driven body control. To bridge this gap and realize the potential of collaborative human-LLM action, this work explores human experience of LLM driven kinesthetic assistance. Specifically, we introduced an \"Align-Analyze-Adjust\" strategy and developed FlightAxis, a tool that integrates LLM with Electrical Muscle Stimulation (EMS) for flight skill acquisition, a representative operational skill domain. FlightAxis learns flight skills from manuals and guides forearm movements during simulated flight tasks. Our results demonstrate high user acceptance of LLM-mediated body control and significantly reduced task completion times. Crucially, trainees reported that this kinesthetic assistance enhanced their awareness of operation flaws and fostered increased engagement in the training process, rather than relieving perceived load. This work demonstrated the potential of kinesthetic LLM training in operational skill acquisition.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "11:00",
        "session": "Humans and AI",
        "repo": "https:\/\/github.com\/Z500-RAY\/LLM-Driving-EMS-Assistant"
    },
    {
        "id": "8558",
        "title": "Explainable Automatic Fact-Checking for Journalists Augmentation in the Wild",
        "authors": "Filipe Altoe, Sérgio Miguel Gonçalves Pinto, H Sofia Pinto",
        "abstract": "Journalistic manual fact-checking is the usual way to address fake news; however, this labor-intensive task regularly is not a match for the scale of the problem. The literature introduced automated fact-checking (AFC) as a potential solution; however, there is still missing functionality in the AFC pipeline, a lack of research benchmarking data, and a disconnect between their design and human factors crucial for adoption. We present a fully explainable AFC framework designed to augment professional journalists in the wild. A novel human annotation-free approach surpasses state-of-the-art multi-label classification by 12%. It is the first to demonstrate strong generalization across different claim subjects without retraining and to generate complete verdict explanation articles and their summaries. A focused user study of 103 professional journalists, with 93% having dedicated experience with fact-checking, validates the framework's level of explainability, transparency, and quality of generated fact-checking artifacts. The importance of establishing clear source selection and bias evaluation criteria reinforced the need for human augmentation, not replacement, by AFC systems.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "Human Centred AI (2\/2)",
        "repo": "https:\/\/github.com\/filipealtoe\/Automatic-evidence-based-explanation",
        "poster_positions": "From board n131 to board n136"
    },
    {
        "id": "8757",
        "title": "Toward Informed AV Decision-Making: Computational Model of Well-being and Trust in Mobility ",
        "authors": "Zahra Zahedi, Shashank Mehrotra, Teruhisa Misu, Kumar Akash",
        "abstract": "For future human-autonomous vehicle (AV) interactions to be effective and smooth, human-aware systems that analyze and align human needs with automation decisions are essential. Achieving this requires systems that account for human cognitive states. We present a novel computational model in the form of a Dynamic Bayesian Network (DBN) that infers the cognitive states of both AV users and other road users, integrating this information into the AV's decision-making process. Specifically, our model captures the ``well-being'' of both an AV user and an interacting road user as cognitive states alongside trust. Our DBN models infer beliefs over the AV user’s evolving well-being, trust, and intention states, as well as the possible well-being of other road users, based on observed interaction experiences. Using data collected from an interaction study, we refine the model parameters and empirically assess its performance. Finally, we extend our model into a causal inference model (CIM) framework for AV decision-making, enabling the AV to enhance user well-being and trust while balancing these factors with its own operational costs and the well-being of interacting road users. Our evaluation demonstrates the model’s effectiveness in accurately predicting user's states and guiding informed, human-centered AV decisions.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "Human Centred AI (2\/2)",
        "repo": "https:\/\/github.com\/honda-research-institute\/wellbeing-trust-model",
        "poster_positions": "From board n131 to board n136"
    },
    {
        "id": "8460",
        "title": "LivePoem: Improving the Learning Experience of Classical Chinese Poetry with AI-Generated Musical Storyboards",
        "authors": "Qihao Liang, Xichu Ma, Torin Hopkins, Ye Wang",
        "abstract": "Textbook reading has long dominated classical poetry education in Chinese-speaking communities. However, research has shown that extensive text-based learning can lead to learner disengagement and a less pleasant experience. This paper aims to improve the experience of classical Chinese poetry learning by introducing LivePoem—a system that generates musical storyboards (storyboards with background music) as audiovisual aids to support poetry comprehension. We employ a pre-trained diffusion model for storyboard generation and train a prosody-based poem-to-melody generator using a Transformer model, both validated by standard objective metrics to ensure generation quality. Through a within-subjects study involving 25 non-native Chinese learners, we compared learning outcomes from textbook reading and musical storyboard viewing through standardised reading comprehension tests. Additionally, the learning experience was assessed by the Self-Assessment Manikin (SAM) and an inductive thematic analysis of learners' open-ended feedback. Experimental results show that musical storyboards retained the learning outcomes of textbooks, while more effectively engaging learners and providing a more pleasant learning experience.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "Human Centred AI (2\/2)",
        "repo": "https:\/\/github.com\/lqhac\/LivePoem",
        "poster_positions": "From board n131 to board n136"
    }
]