[
    {
        "id": "DM4",
        "title": "GE-Chat: A Graph Enhanced RAG Framework for Evidential Response Generation of LLMs",
        "authors": "Longchao Da, Parth Mitesh Shah, Kuan-Ru Liou, Jiaxing Zhang, Hua Wei",
        "abstract": "Large Language Models (LLMs) have become integral to human decision-making processes. However, their outputs are not always reliable, often requiring users to assess the accuracy of the information provided manually. This issue is exacerbated by hallucinated responses, which are frequently presented with convincing but incorrect explanations, leading to trust concerns among users. To address this challenge, we propose GE-Chat, a knowledge Graph-enhanced retrieval-augmented generation framework designed to deliver Evidence-based responses. Specifically, when users upload a document, GE-Chat constructs a knowledge graph to support a retrieval-augmented agent, enriching the agent's responses with external knowledge beyond its training data. We further incorporate Chain-of-Thought (CoT) reasoning, n-hop subgraph searching, and entailment-based sentence generation to ensure accurate evidence retrieval. Experimental results demonstrate that our approach improves the ability of existing models to identify precise evidence in free-form contexts, offering a reliable mechanism for verifying LLM-generated conclusions and enhancing trustworthiness.",
        "location": "Guangzhou",
        "day": "August 29th",
        "hour": "17:15",
        "session": "DEMOS1.1"
    },
    {
        "id": "DM6",
        "title": "SPARC: An AI-Based Speech Processing and Real-Time Correction System",
        "authors": "TingRay Chung, Pin-Yu Chen",
        "abstract": "In the world of audio narration and video production, maintaining clear and accurate dialogue is crucial.  However, most work done in dubbing mistakes is done in post-production which is often not applicable to live broadcasts. This project aims to develop a real-time voice correction system that automatically detects and corrects speech errors in near real-time while integrating the adjusted audio into ongoing conversations without disrupting the natural flow. This paper utilizes various AI tools like the Nous Hermes 2-Mistral 7B DPO large language model to first generate the reference script for Coqui's XTTS-V2 zero-shot text-to-speech voice cloning model. After the correction is generated, it goes through a series of filters to replace the mistake and seamlessly integrates it. The experiment's user survey demonstrates that the corrected audio is of high quality.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 5",
        "poster_positions": "From board n120 to board n123"
    },
    {
        "id": "DM8",
        "title": "ASP Chef Chats with Large Language Models",
        "authors": "Mario Alviano, Pietro Macrì, Luis Angel Rodriguez Reiners",
        "abstract": "ASP Chef enriches Answer Set Programming (ASP) with the notion of recipe, that is, a sequence of operations on answer sets.\r\nRecipes are designed and executed in modern browsers, and further improve the fast prototyping capabilities of ASP.\r\nThis paper introduces new operations designed to integrate Large Language Models (LLMs) in recipe, with the aim of combining the reasoning strength of ASP with the natural language capabilities of LLMs, to enable more interactive and adaptive problem-solving workflows.\r\nIn a nutshell, answer sets in input are transformed into prompts for LLMs, whose responses are processed to extract facts for subsequent operations within the recipe.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 2",
        "poster_positions": "From board n109 to board n112"
    },
    {
        "id": "DM13",
        "title": "How to Make Reproducible Research in Machine Unlearning with ERASURE",
        "authors": "Andrea D'Angelo, Claudio Savelli, Gabriele Tagliente, Flavio Giobergia, Elena Baralis, Giovanni Stilo",
        "abstract": "Machine unlearning, the process of removing specific data influences from Machine Learning models, is critical for complying with regulations like the GDPR's right to be forgotten and addressing copyright disputes in large models. Despite its rising importance, the field still lacks standardized tools, hindering reproducibility and evaluation. Here, we present, in an extensive way, ERASURE,  a unified framework enabling reproducibility by implementing common unlearning techniques, evaluation metrics, and dedicated datasets.\r\nERASURE advances research, ensures solution comparability, and facilitates reproducibility, addressing future legal and ethical challenges in data management.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 6",
        "poster_positions": "From board n124 to board n126"
    },
    {
        "id": "DM14",
        "title": "DejAIvu: Identifying and Explaining AI Art on the Web in Real-Time with Saliency Maps",
        "authors": "Jocelyn Dzuong",
        "abstract": "The recent surge in advanced generative models, such as diffusion models and generative adversarial networks (GANs), has led to an alarming rise in AI-generated images across various domains on the web. While such technologies offer benefits such as democratizing artistic creation, they also pose challenges in misinformation, digital forgery, and authenticity verification. Moreover, the unattributed use of AI-generated images in media and marketing has provoked significant backlash from online users today. In response to this, we introduce DejAIvu, a Chrome Web extension that combines real-time AI-generated image detection with saliency-based explainability while users browse the web. Using an ONNX-optimized deep learning model, DejAIvu automatically analyzes images on websites such as Google Images, identifies AI-generated content using model inference, and overlays a saliency heatmap to highlight AI-related artifacts. Our approach integrates efficient in-browser inference, gradient-based saliency analysis, and a seamless user experience, ensuring that AI detection is both transparent and interpretable. We also evaluate DejAIvu across multiple pretrained architectures and benchmark datasets, demonstrating high accuracy and low latency, making it a practical and deployable tool for enhancing AI image accountability. The code for this system can be found at https:\/\/github.com\/Noodulz\/dejAIvu.",
        "location": "",
        "day": "",
        "hour": "",
        "session": ""
    },
    {
        "id": "DM21",
        "title": "Search Swarm: Multiagent Large Language Models Framework for E-commerce Product Search",
        "authors": "Nagim Isyanbaev, Ilya Makarov",
        "abstract": "Search engines are vital for online e-commerce but often struggle with long, detailed queries. We introduce Search Swarm, a novel multi-agent system designed to improve search engine navigation on platforms like Amazon by accurately locating relevant products based on user instructions. Search Swarm employs multiple large language model (LLM) agents, each with a specific role: query planner, searcher, critic, and attribute selector. These agents collaborate to generate search queries, evaluate results, and identify the best product options tailored to users' needs. Our framework outperforms existing methods like ReAct and Reflexion in the WebShop environment, achieving a reward score of 62.64, compared to scores of 54.1, 59.8, 61.5, and 58.2 for other approaches. Furthermore, in a comparison with a basic rule-based method on Amazon, Search Swarm achieved a score 38.71 points higher and a 41\\% greater success rate, demonstrating its superior ability to provide relevant product matches over traditional search engines.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 1",
        "poster_positions": "From board n105 to board n108"
    },
    {
        "id": "DM23",
        "title": "RobustX: Robust Counterfactual Explanations Made Easy",
        "authors": "Junqi Jiang, Luca Marzari, Aaryan Purohit, Francesco Leofante",
        "abstract": "The increasing use of Machine Learning (ML) models to aid decision-making in high-stakes industries demands explainability to facilitate trust. Counterfactual Explanations (CEs) are ideally suited for this, as they can offer insights into the predictions of an ML model by illustrating how changes in its input data may lead to different outcomes. However, for CEs to realise their explanatory potential, significant challenges remain in ensuring their robustness under slight changes in the scenario being explained. Despite the widespread recognition of CEs' robustness as a fundamental requirement, a lack of standardised tools and benchmarks hinders a comprehensive and effective comparison of robust CE generation methods. In this paper, we introduce RobustX, an open-source Python library implementing a collection of CE generation and evaluation methods, with a focus on the robustness property. RobustX provides interfaces to several existing methods from the literature, enabling streamlined access to state-of-the-art techniques. The library is also easily extensible, allowing fast prototyping of novel robust CE generation and evaluation methods.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 4",
        "poster_positions": "From board n117 to board n119"
    },
    {
        "id": "DM24",
        "title": "Combining Code Generating Large Language Models and Self-Play to Iteratively Refine Strategies in Games",
        "authors": "Yoram Bachrach, Edan Toledo, Karen Hambardzumyan, Despoina Magka, Martin Josifoski, Minqi Jiang, Jakob Foerster, Roberta Raileanu, Tatiana Shavrina, Nicola Cancedda, Avraham Ruderman, Katie Millican, Andrei Lupu, Rishi Hazra",
        "abstract": "We propose a self-play approach to generating strategies for playing in multi-player games, where strategies are represented as computer code. We use large language models (LLMs) to generate pieces of code to play in the game, which we refer to as generated bots. We engage the LLM generated bots in competitions, designed to generate increasingly stronger strategies. We follow game theoretic principles in organizing these tournaments, and use a Policy Space Response Oracle (PSRO) approach. We start with an initial set of LLM generated bots, and continue in rounds for adding new bots into the population. Each round adds a bot to the population by asking the LLM to produce code for playing against a bot representing the Nash equilibrium mixture over the current population. Our analysis shows that even a few rounds are sufficient to produces strong bots for playing the game. Our demo shows the process for the game of Checkers. We allow users to select initial bots in the population, run the process, inspect how the bots evolve over time, and play against the generated bots.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 4",
        "poster_positions": "From board n117 to board n119"
    },
    {
        "id": "DM25",
        "title": "OpenIAI-SNIO: A Systematic AR-Based Assembly Guidance System for Small-Scale, High-Density Industrial Components",
        "authors": "Yuntao Wang, Yu Cheng, Junhao Geng",
        "abstract": "This paper develops an AR-based assembly guidance system, OpenIAI-SNIO, for small-scale, high-density industrial components (SHIC), which addresses the challenge of existing AR technology's inability to achieve complete, accurate, and stable visual cognition and assembly operation guidance for SHIC. OpenIAI-SNIO combines artificial intelligence methods such as computer vision and deep learning with rule-based reasoning and augmented reality to achieve adaptive, whole process, and precise guidance of SHIC assembly in situations where visual information is insufficient. The application case shows that OpenIAI-SNIO can effectively improve the efficiency and quality of SHIC assembly, and reduce the workload of operators, realizing the systematic and practical application of AR technology in SHIC assembly.",
        "location": "Guangzhou",
        "day": "August 29th",
        "hour": "17:15",
        "session": "DEMOS1.1"
    },
    {
        "id": "DM27",
        "title": "Tsururu: A Python-based Time Series Forecasting Strategies Library",
        "authors": "Alina Kostromina, Kseniia Kuvshinova, Aleksandr Yugay, Andrey Savchenko, Dmitry Simakov",
        "abstract": "While current time series research focuses on developing new models, crucial questions of selecting an optimal approach for training such models are underexplored. Tsururu, a Python library introduced in this paper, bridges SoTA research and industry by enabling flexible combinations of global and multivariate approaches and multi-step-ahead forecasting strategies. It also enables seamless integration with various forecasting models. Available at https:\/\/github.com\/sb-ai-lab\/tsururu.",
        "location": "Guangzhou",
        "day": "August 29th",
        "hour": "17:15",
        "session": "DEMOS1.2"
    },
    {
        "id": "DM28",
        "title": "Aerial Coverage Path Planning in Nuclear Emergencies",
        "authors": "Johann Blake, Matthias Schubert",
        "abstract": "We formulate a Coverage Path Planning (CPP) problem for a helicopter or a UAV tasked with mapping ground-level radiation while avoiding radiation that is too strong. We introduce a simulation environment that incorporates digital elevation models, altitude-dependent measurement footprints and realistic flight constraints, as well as state-of-the-art radiation scenario simulations, such as nuclear explosions, provided by the German Federal Office for Radiation Protection. We highlight the complexity of radiological survey missions and demonstrate the necessity for new CPP approaches that address these unique challenges. The code to our simulation environment can be found under https:\/\/github.com\/JohannBlake\/Aerial-Coverage-Path-Planning-in-Nuclear-Emergencies.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 1",
        "poster_positions": "From board n105 to board n108"
    },
    {
        "id": "DM41",
        "title": "TRIKOP: Exploring Visual Prompting Paradigms for Multi-Grade Knee Osteoarthritis Classification on MRI Images",
        "authors": "Hieu Phan, Hung Pham, Dat Nguyen, Khoa Le, Tuan Nguyen, Triet Tran, Tho Quan",
        "abstract": "Knee osteoarthritis (KOA) is a degenerative joint disease that significantly impacts quality of life. While transfer learning shows promise in medical imaging, its application to KOA diagnosis remains challenging due to medical data's unique characteristics. To address this, we propose TRIKOP, a framework leveraging Visual Prompting for KOA diagnosis on MRI. Our approach explores three prompt-generating strategies that extract clinically relevant information from input images. Each prompt type is encoded using a tailored method to integrate effectively into the Vision Transformer for optimal representation. Among them, the contrastive embedding prompting strategy achieves 63.04% accuracy on the OAI dataset, surpassing prior studies. Moreover, TRIKOP produces attention maps highlighting diagnostically significant regions, improving model interpretability. This work highlights TRIKOP’s potential to improve AI-driven KOA diagnosis and clinical support.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 3",
        "poster_positions": "From board n113 to board n116"
    },
    {
        "id": "DM43",
        "title": "PCToolkit: A Unified Plug-and-Play Prompt Compression Toolkit of Large Language Models",
        "authors": "Zheng Zhang, Jinyi Li, Yihuai Lan, Xiang Wang, Hao Wang",
        "abstract": "Prompt engineering enables Large Language Models (LLMs) to perform a variety of tasks. However, lengthy prompts significantly increase computational complexity and economic costs. To address this issue, prompt compression reduces prompt length while maintaining LLM response quality. To support rapid implementation and standardization, we present the Prompt Compression Toolkit (PCToolkit), a unified plug-and-play framework for LLM prompt compression. PCToolkit integrates state-of-the-art compression algorithms, benchmark datasets, and evaluation metrics, enabling systematic performance analysis. Its modular architecture simplifies customization, offering portable interfaces for seamless incorporation of new datasets, metrics, and compression methods. Our code is available at https:\/\/github.com\/3DAgentWorld\/Toolkit-for-Prompt-Compression. Our demo is at https:\/\/huggingface.co\/spaces\/CjangCjengh\/Prompt-Compression-Toolbox.",
        "location": "Guangzhou",
        "day": "August 29th",
        "hour": "17:15",
        "session": "DEMOS1.2"
    },
    {
        "id": "DM46",
        "title": "Fairness-Aware Interactive Target Variable Definition",
        "authors": "Dalia Gala, Milo Phillips-Brown, Naman Goel, Carina Prunkl, Laura Alvarez Jubete, medb corcoran, Ray Eitel-Porter",
        "abstract": "Machine learning requires defining one's target variable for predictions or decisions, a process that can have profound implications on fairness, since biases are often encoded in target variable definition itself, before any data collection or training. The downstream impacts of target variable definitions must be taken into account in order to responsibly develop, deploy, and use the algorithmic systems. We propose FairTargetSim (FTS), an interactive and simulations-based approach for this. We demonstrate FTS using the example of algorithmic hiring, grounded in real-world data and user-defined target variables. FTS is open-source; it can be used by algorithm developers, non-technical stakeholders, researchers, and educators in a number of ways. FTS is available at: http:\/\/tinyurl.com\/ftsinterface. The  video accompanying this paper is here: http:\/\/tinyurl.com\/ijcaifts.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 5",
        "poster_positions": "From board n120 to board n123"
    },
    {
        "id": "DM52",
        "title": "v7: Toward a Faster Vietnamese Typing Toolkit",
        "authors": "Duc Nguyen, KHANG VÕ, Nghia Nguyen",
        "abstract": "This paper introduces v7, an AI-powered Vietnamese typing method that replaces traditional modifier-based input methods with an intelligent predictive system. By optimizing both initial consonants, rhymes, and tones — the core components of Vietnamese phonetics and orthography, v7 drastically reduces keystrokes while maintaining high prediction accuracy. Through real-time responsiveness, auto-spacing, and linguistic pattern recognition, v7 enhances both typing speed and efficiency. As the first predictive-based approach tailored to the Vietnamese language, v7 offers a more natural, intuitive, and effortless typing experience, redefining digital text entry for Vietnamese users.",
        "location": "",
        "day": "",
        "hour": "",
        "session": ""
    },
    {
        "id": "DM54",
        "title": "SkyRover: A Modular Simulator for Cross-Domain Pathfinding",
        "authors": "Wenhui Ma, Wenhao Li, Bo Jin, Changhong Lu, Xiangfeng Wang",
        "abstract": "Unmanned Aerial Vehicles (UAVs) and Automated Guided Vehicles (AGVs) increasingly collaborate in logistics, surveillance, inspection tasks and etc. \r\nHowever, existing simulators often focus on a single domain, limiting cross-domain study. \r\nThis paper presents the SkyRover, a modular simulator for UAV-AGV multi-agent pathfinding (MAPF). \r\nSkyRover supports realistic agent dynamics, configurable 3D environments, and convenient APIs for external solvers and learning methods. \r\nBy unifying ground and aerial operations, it facilitates cross-domain algorithm design, testing, and benchmarking. \r\nExperiments highlight SkyRover’s capacity for efficient pathfinding and high-fidelity simulations in UAV-AGV coordination.\r\nWe believe the SkyRover fills a key gap in MAPF research. \r\nProject is available at https:\/\/sites.google.com\/view\/mapf3d\/home.",
        "location": "Guangzhou",
        "day": "August 29th",
        "hour": "17:15",
        "session": "DEMOS1.3"
    },
    {
        "id": "DM57",
        "title": "What If LLMs Can Smell: A Prototype",
        "authors": "Xueyi Zhou, Qi Lu, Dong-Kyu Chae",
        "abstract": "The olfaction is hardly mentioned in the studies of multi-modal Large Language Models (LLMs). This demo presents a prototypical framework to embody prevalent LLMs with smelling ability using a plug-and-play olfactory signal processing service. To this end, we collect a dataset on Korean beers by self-developed electronic noses (e-noses) and an open-source dataset. An olfaction-related question-answering corpus is also generated to fine-tune LLMs. A gas classification model is applied to identify the smelling liquor upon the e-nose data. We then adopt and fine-tune LLMs on the generated datasets. The results show that LLMs under this framework can interact with the environment by its `nose' and provide olfaction-related answers augmented by our dataset. To the best of our knowledge, this is the first work on embodying LLMs with artificial olfaction. We additionally deployed the gas classification model and the trained LLM in a simple web-based system to show the feasibility of our prototype. Our demo video can be found at: https:\/\/bit.ly\/4j8x6ZY.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "16:30",
        "session": "DEMOS2.1"
    },
    {
        "id": "DM58",
        "title": "SAFE: Structured Argumentation for Fact-checking with Explanations",
        "authors": "Xiaoou Wang, Elena Cabrio, Serena Villata",
        "abstract": "Explainable fact-checking plays a vital role in the fight against disinformation in today’s digital landscape. With the increasing volume of unverified content online, providing justifications for fact-checking has become essential to help users make informed decisions. While recent studies provide user-friendly explanations through abstractive or extractive summarization, they often assume the availability of human-written fact-checking articles, which is not always the case. This demo introduces SAFE, an argument-based framework designed to enhance both fact-checking and its justification. Specifically, SAFE offers three key features: i) producing argument-structured summaries of human-written fact-checking articles, ii) in the absence of human-written articles, generating structured summaries based on evidence retrieved from a corpus through a jointly trained summarization and evidence retrieval system, and iii) assessing the truthfulness of a claim by analyzing the structured summary.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 2",
        "poster_positions": "From board n109 to board n112"
    },
    {
        "id": "DM59",
        "title": "Aletheia: Detect, Discuss, and Stay Informed on Fake News",
        "authors": "Dorsaf Sallami, Esma Aïmeur",
        "abstract": "In today's digital era, the rapid spread of fake news undermines both social unity and democratic institutions, demanding effective countermeasures.  Current browser extensions to counter fake news have significant limitations, such as opaque models, dependency on traditional Machine Learning (ML) techniques, lack of explanatory features, and limited focus on detection without user engagement support. This paper introduces Aletheia, a novel browser extension that addresses these shortcomings by leveraging Retrieval Augmented Generation (RAG) and Large Language Models (LLMs) to enhance fake news detection and provide evidence-based explanations. Additionally, Aletheia incorporates two key components: a Discussion Hub, enabling users to discuss instances of fake news, and a Stay Informed feature, which displays the latest fact-checks. Aletheia surpasses state-of-the-art methods according to experimental results.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 2",
        "poster_positions": "From board n109 to board n112"
    },
    {
        "id": "DM63",
        "title": "MedDiT: A Knowledge-Controlled Diffusion Transformer Framework for Dynamic Medical Image Generation in Virtual Simulated Patient",
        "authors": "Yanzeng Li, Cheng Zeng, Jinchao Zhang, Jie Zhou, Lei Zou",
        "abstract": "Medical education relies heavily on Simulated Patients (SPs) to provide a safe environment for students to practice clinical skills, including medical image analysis. However, the high cost of recruiting qualified SPs and the lack of diverse medical imaging datasets have presented significant challenges. To address these issues, this paper introduces MedDiT, a novel knowledge-controlled conversational framework that can dynamically generate plausible medical images aligned with simulated patient symptoms, enabling diverse diagnostic skill training. Specifically, MedDiT integrates various patient Knowledge Graphs (KGs), which describe the attributes and symptoms of patients, to dynamically prompt Large Language Models' (LLMs) behavior and control the patient characteristics, mitigating hallucination during medical conversation. Additionally, a well-tuned Diffusion Transformer (DiT) model is incorporated to generate medical images according to the specified patient attributes in the KG. In this paper, we present the capabilities of MedDiT through a practical demonstration, showcasing its ability to act in diverse simulated patient cases and generate the corresponding medical images. This can provide an abundant and interactive learning experience for students, advancing medical education by offering an immersive simulation platform for future healthcare professionals. The work sheds light on the feasibility of incorporating advanced technologies like LLM, KG, and DiT in education applications, highlighting their potential to address the challenges faced in simulated patient-based medical education.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "16:30",
        "session": "DEMOS2.1"
    },
    {
        "id": "DM68",
        "title": "PyTorch-Lifestream: Learning Embeddings on Discrete Event Sequences",
        "authors": "Artem Sakhno, Ivan Kireev, Dmitrii Babaev, Maxim Savchenko, Gleb Gusev, Andrey Savchenko",
        "abstract": "The domain of event sequences is widely applied in various industrial tasks in banking, healthcare, etc., where temporal tabular data processing is required. This paper introduces PyTorch-Lifestream, the first open-source library specially designed to handle event sequences. It supports scenarios with multimodal data and offers a variety of techniques for learning embeddings of event sequences and end-to-end model training. Furthermore, PyTorch-Lifestream efficiently implements state-of-the-art methods for event sequence analysis and adapts approaches from similar domains, thus enhancing the versatility and performance of sequence-based models for a wide range of applications, including financial risk scoring, campaigning, user ID matching, churn prediction, fraud detection, medical diagnostics, and recommender systems.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "16:30",
        "session": "DEMOS2.1"
    },
    {
        "id": "DM69",
        "title": "SandboxSocial: A Sandbox for Social Media Using Multimodal AI Agents",
        "authors": "Maximilian Puelma Touzel, Sneheel Sarangi, Gayatri Krishnakumar, Busra Tugce Gurbuz, Austin Welch, Zachary Yang, Andreea Musulan, Hao Yu, Ethan Kosak-Hine, Tom Gibbs, Camille Thibault, Reihaneh Rabbany, Jean-François Godbout, Dan Zhao, Kellin Pelrine",
        "abstract": "The online information ecosystem enables influence campaigns of unprecedented scale and impact. We urgently need empirically grounded approaches to counter the growing threat of malicious campaigns, now amplified by generative AI. But, developing defenses in real-world settings is impractical. Social system simulations with agents modelled using Large Language Models (LLMs) are a promising alternative approach and a growing area of research. However, existing simulators lack features needed to capture the complex information-sharing dynamics of platform-based social networks. To bridge this gap, we present SandboxSocial, a new simulator that includes several key innovations, mainly: (1) a virtual social media platform (modelled as Mastodon and mirrored in an actual Mastodon server) that enables a realistic setting in which agents interact; (2) an adapter that uses real-world user data to create more grounded agents and social media content; and (3) multi-modal capabilities that enable our agents to interact using both text and images---just as humans do on social media. We make the simulator more useful to researchers by providing measurement and analysis tools that track simulation dynamics and compute evaluation metrics to compare experimental results.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 1",
        "poster_positions": "From board n105 to board n108"
    },
    {
        "id": "DM72",
        "title": "Veracity: An Open-Source AI Fact-Checking System",
        "authors": "Taylor Lynn Curtis, Maximilian Puelma Touzel, William Garneau, Manon Gruaz, Mike Pinder, Li Wei Wang, Sukanya Krishna, Luda Cohen, Jean-François Godbout, Reihaneh Rabbany, Kellin Pelrine",
        "abstract": "The proliferation of misinformation poses a significant threat to society, exacerbated by the capabilities of generative AI. \r\nThis demo paper introduces Veracity, an open-source AI system designed to empower individuals to combat misinformation through transparent and accessible fact-checking.  Veracity leverages the synergy between Large Language Models (LLMs) and web retrieval agents to analyze user-submitted claims and provide grounded veracity assessments with intuitive explanations.  Key features include multilingual support, numerical scoring of claim veracity, and an interactive interface inspired by familiar messaging applications.  This paper will showcase Veracity's ability to not only detect misinformation but also explain its reasoning, fostering media literacy and promoting a more informed society.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 6",
        "poster_positions": "From board n124 to board n126"
    },
    {
        "id": "DM76",
        "title": "VRD-IU: Lessons from Visually Rich Document Intelligence and Understanding",
        "authors": "Yihao Ding, Soyeon Caren Han, Yan Li, Josiah Poon",
        "abstract": "Visually Rich Document Understanding (VRDU) has emerged as a critical field in document intelligence, enabling automated extraction of key information from complex documents across domains such as medical, financial, and educational applications. However, form-like documents pose unique challenges due to their complex layouts, multi-stakeholder involvement, and high structural variability. Addressing these issues, the VRD-IU Competition was introduced, focusing on extracting and localizing key information from multi-format forms within the Form-NLU dataset, which includes digital, printed, and handwritten documents.\r\nThis paper presents insights from the competition, which featured two tracks: Track A, emphasizing entity-based key information retrieval, and Track B, targeting end-to-end key information localization from raw document images. With over 20 participating teams, the competition showcased various state-of-the-art methodologies, including hierarchical decomposition, transformer-based retrieval, multimodal feature fusion, and advanced object detection techniques. The top-performing models set new benchmarks in VRDU, providing valuable insights into document intelligence.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 3",
        "poster_positions": "From board n113 to board n116"
    },
    {
        "id": "DM77",
        "title": "Taking STEPS Forward: Enhancing Online Peer-Counseling with Schema Therapy via Socratic Questioning",
        "authors": "Beng Heng Ang, Sujatha Das Gollapalli, See-Kiong Ng",
        "abstract": "Peer-counseling is essential in online mental health communities to provide relatable support to those seeking help, but the peer-counselors often lack professional training in therapeutic counseling to produce the desired cognitive changes. In this paper, we present STEPS, an AI-powered assistive dialog tool for peer-counseling. Unlike other existing tools, STEPS assists peer-counselors in facilitating cognitive change in online counseling settings. Towards this goal, we emulate two key phases in a Schema Therapy-based in-person counseling session–(1) Schema Assessment to uncover the deep-seated irrational beliefs underlying an individual’s mental health problems, and (2) Cognitive Change to reframe these beliefs into healthier alternatives. In both phases, we employ Socratic questioning techniques to effectively elicit critical introspection and guide cognitive change. We describe STEPS and present expert evaluation studies on its counseling conversations on real-world mental health forum posts. Our results indicate that STEPS significantly outperforms competitive baselines on all key metrics related to schema assessment, cognitive change strategies, and critical thinking, achieving an impressive average rating of 5 out of 6, highlighting its strong potential as a transformative tool for online peer-counseling.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "16:30",
        "session": "DEMOS2.2"
    },
    {
        "id": "DM79",
        "title": "MoleculeMiner: Extracting and Linking Molecule Figures with Tabular Metadata",
        "authors": "Abhisek Dey, Nathaniel H. Stanley",
        "abstract": "Despite an ongoing shift in automated chemical literature search methods, many are fairly limited in ability to find very specific relevant information about a drawn molecule and its associated property data. We aim to tackle the challenge of converting drawn molecules to a machine readable representation and co-reference any associated molecule data. MoleculeMiner is a system where a user can feed in their own patent or paper to obtain each drawn molecule along with any specific metadata (chemical name, chemical reactivity, yield, purity etc.) provided anywhere in the PDF in a tabular format, using an interactive user-friendly environment. We also present MolScribeV2, a molecular image parser which improved upon the original MolScribe by introducing pixel-based self attention positional embedding technique. Along with other changes, MolScribeV2 is robust to varied styles of compound drawings commonly found in patents and papers--scanned or born digital. Our extraction and user interactive system can be found at https:\/\/github.com\/insitro\/MoleculeMiner.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 3",
        "poster_positions": "From board n113 to board n116"
    },
    {
        "id": "DM80",
        "title": "MatchXplain: Analyzing Preferences, Explaining Outcomes, and Simplifying Decisions",
        "authors": "Hadi Hosseini, Yubo Jing, Ronak Singh",
        "abstract": "Matching markets, where agents are assigned to one another based on preferences and constraints, are fundamental in various AI-driven applications such as school choice, content matching, and recommender systems. A key challenge in these markets is understanding preference data, as the interpretability of algorithmic solutions hinges on accurately capturing and explaining preferences. We introduce MatchXplain, a platform that integrates preference explanation with a robust matching engine. MatchXplain offers a layered approach for explaining preferences, computing diverse matching solutions, and providing interactive visualizations to enhance user understanding. By bridging algorithmic decision-making with explainability, MatchXplain improves transparency and trust in algorithmic matching markets.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 5",
        "poster_positions": "From board n120 to board n123"
    },
    {
        "id": "DM84",
        "title": "TimelyMed: AI-Driven Clinical Course Attribution and Temporal Mapping for Psychiatric Medical Records",
        "authors": "Chien-Hung Chen, Chi-Shin Wu, Chu-Hsien Su, Hsin-Hsi Chen",
        "abstract": "Timely understanding of a patient’s clinical course is crucial for effective treatment. Extracting course-related information, such as temporal and medical events, from unstructured medical records is both challenging and time-consuming, especially when relying on manual identification by physicians. We introduce TimelyMed, a system powered by a locally deployed large language model (LLM) that ensures data security while efficiently organizing key psychiatric events and their corresponding temporal information. Additionally, our system is attributed, allowing clinicians to not only categorize events but also trace them back to their original textual descriptions, ensuring transparency and interpretability in clinical decision-making. By organizing temporal and medical event information into timelines, our system enables physicians to quickly grasp a patient’s medical history while effectively reducing their cognitive burden.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 2",
        "poster_positions": "From board n109 to board n112"
    },
    {
        "id": "DM85",
        "title": "Automated Decision-Making on Networks with LLMs through Knowledge-Guided Evolution",
        "authors": "Xiaohan Zheng, Lanning Wei, Yong Li, Quanming Yao",
        "abstract": "Effective decision-making on networks often relies on learning from graph-structured data, where Graph Neural Networks (GNNs) play a central role, but they take efforts to configure and tune. In this demo, we propose LLMNet, showing how to design GNN automated through Large Language Models. Our system develops a set of agents that construct graph-related knowlege bases and then leverages Retrieval-Augmented Generation (RAG) to support automated configuration and refinement of GNN models through a knowledge-guided evolution process. These agents, equipped with specialized knowledge bases, extract insights into tasks and graph structures by interacting with the knowledge bases.Empirical results show LLMNet excels in twelve datasets across three graph learning tasks, validating its effectiveness of GNN model designing.",
        "location": "Guangzhou",
        "day": "August 29th",
        "hour": "17:15",
        "session": "DEMOS1.1"
    },
    {
        "id": "DM87",
        "title": "A Smart Multimodal Healthcare Copilot with Powerful LLM Reasoning",
        "authors": "Xuejiao Zhao, Siyan Liu, Su-Yin Yang, Chunyan Miao",
        "abstract": "Misdiagnosis causes significant harm to healthcare systems worldwide, leading to increased costs and patient risks. MedRAG is a smart multimodal healthcare copilot equipped with powerful large language model (LLM) reasoning, designed to enhance medical decision-making. It supports multiple input modalities, including non-intrusive voice monitoring, general medical queries, and electronic health records. MedRAG provides recommendations on diagnosis, treatment, medication, and follow-up questioning. Leveraging retrieval-augmented generation enhanced by knowledge graph-elicited reasoning, MedRAG retrieves and integrates critical diagnostic insights, reducing the risk of misdiagnosis. It has been evaluated on both public and private datasets, outperforming existing models and offering more specific and accurate healthcare assistance. A demonstration video of MedRAG is available at: https:\/\/www.youtube.com\/watch?v=PNIBDMYRfDM. The source code is available at: https:\/\/github.com\/SNOWTEAM2023\/MedRAG.",
        "location": "Guangzhou",
        "day": "August 29th",
        "hour": "17:15",
        "session": "DEMOS1.3"
    },
    {
        "id": "DM88",
        "title": "Conversational Exploration of Literature Landscape with LitChat",
        "authors": "Mingyu Huang, Shasha Zhou, Yuxuan Chen, Ke Li",
        "abstract": "We are living in an era of \"big literature\", where the volume of digital scientific publications is growing exponentially. While offering new opportunities, this also poses challenges for understanding literature landscapes, as traditional manual reviewing is no longer feasible. Recent large language models (LLMs) have shown strong capabilities for literature comprehension, yet they are incapable of offering \"comprehensive, objective, open and transparent\" views desired by systematic reviews due to their limited context windows and trust issues like hallucinations. Here we present LitChat, an end-to-end, interactive and conversational literature agent that augments LLM agents with data-driven discovery tools to facilitate literature exploration. LitChat automatically interprets user queries, retrieves relevant sources, constructs knowledge graphs, and employs diverse data-mining techniques to generate evidence-based insights addressing user needs. We illustrate the effectiveness of LitChat via a case study on AI4Health, highlighting its capacity to quickly navigate the users through large-scale literature landscape with data-based evidence that is otherwise infeasible with traditional means.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "16:30",
        "session": "DEMOS2.2"
    },
    {
        "id": "DM90",
        "title": "Using Planning for Automated Testing of Video Games",
        "authors": "Tomáš Balyo, Roman Barták, Lukáš Chrpa, Michal Červenka, Filip Dvořák, Stephan Gocht, Lukáš Lipčák, Viktor Macek, Dominik Roháček, Josef Ryzí, Martin Suda, Dominik Šafránek, Slavomír Švancar, G. Michael Youngblood",
        "abstract": "In this demonstration, we present a system that automates regression testing for video games using automated planning techniques. Traditional test scripts are a common method for testing both video games and software in general. While effective, they require manual creation and frequent updates throughout development, making the process labor-intensive. Our system eliminates this burden by automatically generating and maintaining test scripts. The test engineer only needs to define the game’s rules using the Planning Domain Definition Language (PDDL) and specify initial states and goals for individual test cases. This significantly reduces human effort while ensuring test scripts remain up to date. Additionally, our system integrates with game engine editors—supporting both Unity and Unreal to execute and evaluate test cases directly within the game. It collects detailed logs, telemetry data, and video recordings, allowing users to review test results efficiently.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 3",
        "poster_positions": "From board n113 to board n116"
    },
    {
        "id": "DM93",
        "title": "NatSTV: Towards Verification of Natural Strategic Ability",
        "authors": "Mateusz Kamiński, Damian Kurpiewski, Wojciech Jamroga",
        "abstract": "We present NatSTV, a tool for approximate verification of natural strategic ability in multi-agent systems. The tool builds on our model checker STV (STrategic Verifier), and implements heuristic synthesis of natural strategies for asynchronous agents with imperfect information and recall. All of that is available through a web interface, with no need to install or configure the software by the user.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 1",
        "poster_positions": "From board n105 to board n108"
    },
    {
        "id": "DM97",
        "title": "HealthLens: A Natural Language Querying System for Interactive Visualization of Electronic Health Records",
        "authors": "Haodi Zhang, Siqi Ning, Qiyong Zheng, Yuanfeng Song, Liang-Jie Zhang",
        "abstract": "As an essential part of modern healthcare systems, extracting valuable insights from electronic medical records (EMRs) remains challenging due to the complexity of structured and unstructured data. Data visualization is essential for transforming complex data into comprehensible visuals that enable professionals to identify patterns and trends. This process involves selecting data attributes, transforming the data, choosing appropriate visual encoding methods, and rendering graphical representations using declarative visualization languages (DVLs). However, achieving proficiency in DVLs requires a deep understanding of domain-specific data and expertise in these languages, which poses a significant barrier for beginners and non-technical users. To address these challenges, we present HealthLens, the first user-friendly visualization tool in the EMR domain that eliminates the need for prior knowledge of DVLs. Built on the MedCodeT5 model developed by us and leveraging a large language model with a bilevel optimization approach, HealthLens enables the generation of EMR visualizations from natural language queries. This demonstrates the feasibility of creating sophisticated visualizations with minimal technical expertise, advancing accessibility in the EMR field.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "16:30",
        "session": "DEMOS2.2"
    },
    {
        "id": "DM99",
        "title": "Machine Learning Driven Optimization of Fe-Based TMCs for Photodynamic Therapy",
        "authors": "Vladimir Manuilov, Antonio Francés-Monerris, Abdelazim M.A. Abdelgawwad, Daniel Escudero, Ilya Makarov",
        "abstract": "Noble metal-based photoactive complexes have applications in photodynamic therapy (PDT), but their toxicity and high cost drive interest in sustainable and cheaper alternatives like iron-based compounds. In this paper, quantum chemistry and classical molecular dynamics were employed to characterize the photophysical properties and non-covalent interactions with DNA of two Fe(III) complexes. We explained the absorption of IR wavelength by bright ligand-to-metal transitions and showed that the complexes exhibit persistent, albeit modest, interaction with DNA. Building on these traditional simulation methods, we propose a conceptual ML-driven optimization module designed to refine the structure of iron complexes and enhance their photophysical features. While the framework is not yet implemented, we demonstrate that key properties relevant for PDT can be computationally evaluated, providing a foundation for future iterative optimization. The ML module integrates 3D molecular structures, simulation results, and quantum chemical insights to suggest modifications aimed at shifting the absorption spectrum more favorably into the visible range, improving their suitability for phototherapies.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 4",
        "poster_positions": "From board n117 to board n119"
    },
    {
        "id": "DM104",
        "title": "DAVE: A Framework for Assisted Analysis of Document Collections in Knowledge-Intensive Domains",
        "authors": "Ruben Agazzi, Renzo Alva Principe, Riccardo Pozzi, Marco Ripamonti, Matteo Palmonari",
        "abstract": "DAVE is a framework for assisting the analysis of documents in knowledge-intensive domains, based on an entity-centric approach supported by annotations of named entities in the documents. DAVE supports search & filtering, document exploration, question answering, and knowledge refinement. It is released as an open-source project that the community can further develop. DAVE’s distinguishing features are: the integration of a chatbot interface based on recent RAG solutions into well-established entity-powered faceted search, the fusion of search and filtering features provided by entity-level annotations with the capability to ask questions on annotated documents; human-in-the-loop functions to consolidate knowledge while exploring information, allowing users to improve annotations from NLP algorithms.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 6",
        "poster_positions": "From board n124 to board n126"
    },
    {
        "id": "DM106",
        "title": "A Multimodal AI Dialogue System for Unified Document, Visual, and Audio Interaction",
        "authors": "Yujun Feng, Jingyi Huang, Yang Zhang",
        "abstract": "This paper presents a multimodal intelligent dialogue system that seamlessly integrates document analysis, visual media processing, and audio interaction within a unified web interface. The system ensures secure user identity verification through persistent conversational management, leveraging textual document analysis, dynamic context integration, and cross-media interactions via video, image, and real-time speech processing. Our approach introduces three key innovations: (1) context-aware document analysis through text extraction, (2) a multimodal input pipeline supporting images, videos, and audio, and (3) persistent chat history management for maintaining conversational continuity. The system facilitates seamless transitions between audio and text, enabling natural interactions by processing audio input and converting text responses into speech. Additionally, the platform provides an intuitive interface for document uploads, camera capture, and audio recording, while ensuring conversation context is preserved across sessions. This implementation demonstrates the practical integration of multimodal input in an interactive artificial intelligence (AI) system, showcasing its potential for enhanced user engagement and interaction.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "11:30",
        "session": "DEMOS 5",
        "poster_positions": "From board n120 to board n123"
    }
]