[
    {
        "id": "9258",
        "title": "Multimodal Retina Image Analysis Survey: Datasets, Tasks and Methods",
        "authors": "Hongwei Sheng, Heming Du, Xin Shen, Sen Wang, Xin Yu",
        "abstract": "Retina images provide a noninvasive view of the central nervous system and microvasculature, making it essential for clinical applications. \r\nChanges in the retina often indicate both ophthalmic and systemic diseases, aiding in diagnosis and early intervention.\r\nWhile deep learning algorithms have advanced retina image analysis, a comprehensive review of related datasets, tasks, and benchmarking is still lacking. \r\nIn this survey, we systematically categorize existing retina image datasets based on their available data modalities, and review the tasks these datasets support in multimodal retina image analysis. \r\nWe also explain key evaluation metrics used in various retina image analysis benchmarks.\r\nBy thoroughly examining current datasets and methods, we highlight the challenges and limitations in existing benchmarks and discuss potential research topics in the field.\r\nWe hope this work will guide future retina analysis methods and promote the shared use of existing data across different tasks.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "09:40",
        "session": "Multidisciplinary Applications - MTA: Biomedical"
    },
    {
        "id": "734",
        "title": "Toward Robust Non-Transferable Learning: A Survey and Benchmark",
        "authors": "Ziming Hong, Yongli Xiang, Tongliang Liu",
        "abstract": "Over the past decades, researchers have primarily focused on improving the generalization abilities of models, with limited attention given to regulating such generalization. However, the ability of models to generalize to unintended data (e.g., harmful or unauthorized data) can be exploited by malicious adversaries in unforeseen ways, potentially resulting in violations of model ethics. Non-transferable learning (NTL), a task aimed at reshaping the generalization abilities of deep learning models, was proposed to address these challenges. While numerous methods have been proposed in this field, a comprehensive review of existing progress and a thorough analysis of current limitations remain lacking. In this paper, we bridge this gap by presenting the first comprehensive survey on NTL and introducing NTLBench, the first benchmark to evaluate NTL performance and robustness within a unified framework. Specifically, we first introduce the task settings, general framework, and criteria of NTL, followed by a summary of NTL approaches. Furthermore, we emphasize the often-overlooked issue of robustness against various attacks that can destroy the non-transferable mechanism established by NTL. Experiments conducted via NTLBench verify the limitations of existing NTL methods in robustness. Finally, we discuss the practical applications of NTL, along with its future directions and associated challenges.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "ML: Transfer Learning"
    },
    {
        "id": "8770",
        "title": "Safety of Embodied Navigation: A Survey",
        "authors": "Zixia Wang, Jia Hu, Ronghui Mu",
        "abstract": "As large language models (LLMs) continue to advance and gain influence, the development of embodied AI has accelerated, drawing significant attention, particularly in navigation scenarios. Embodied navigation requires an agent to perceive, interact with, and adapt to its environment while moving toward a specified target in unfamiliar settings. However, the integration of embodied navigation into critical applications raises substantial safety concerns. Given their deployment in dynamic, real-world environments, ensuring the safety of such systems is critical. This survey provides a comprehensive analysis of safety in embodied navigation from multiple perspectives, encompassing attack strategies, defense mechanisms, and evaluation methodologies. Beyond conducting a comprehensive examination of existing safety challenges, mitigation technologies, and various datasets and metrics that assess effectiveness and robustness, we explore unresolved issues and future research directions in embodied navigation safety. These include potential attack methods, mitigation strategies, more reliable evaluation techniques, and the implementation of verification frameworks. By addressing these critical gaps, this survey aims to provide valuable insights that can guide future research toward the development of safer and more reliable embodied navigation systems. Furthermore, the findings of this study have broader implications for enhancing societal safety and increasing industrial efficiency.",
        "location": "Guangzhou",
        "day": "August 29th",
        "hour": "14:30",
        "session": "Robotics"
    },
    {
        "id": "8296",
        "title": "Harnessing Vision Models for Time Series Analysis: A Survey",
        "authors": "Jingchao Ni, Ziming Zhao, ChengAo Shen, Hanghang Tong, Dongjin Song, Wei Cheng, Dongsheng Luo, Haifeng Chen",
        "abstract": "Time series analysis has evolved from traditional autoregressive models to deep learning, Transformers, and Large Language Models (LLMs). While vision models have also been explored along the way, their contributions are less recognized due to the predominance of sequence modeling. However, challenges such as the mismatch between continuous time series and LLMs’ discrete token space, and the difficulty in capturing multivariate correlations, have led to growing interest in Large Vision Models (LVMs) and Vision-Language Models (VLMs). This survey highlights the advantages of vision models over LLMs in time series analysis, offering a comprehensive dual-view taxonomy that answers key research questions like how to encode time series as images and how to model imaged time series. Additionally, we address pre- and post-processing challenges in this framework and outline future directions for advancing the field.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "14:00",
        "session": "ML: time series, sequences and signals",
        "poster_positions": "From board n6 to board n11"
    },
    {
        "id": "8508",
        "title": "Artificial Intelligence in Spectroscopy: Advancing Chemistry from Prediction To Generation and Beyond",
        "authors": "Kehan Guo, Yili Shen, Gisela Abigail Gonzalez-Montiel, Yue Huang, Yujun Zhou, Mihir Surve, Zhichun Guo, Payel Das, Nitesh V. Chawla, Olaf Wiest, Xiangliang Zhang",
        "abstract": "The rapid advent of machine learning (ML) and artificial intelligence (AI) has catalyzed major transformations in chemistry, yet the application of these methods to spectroscopic and spectrometric data—termed Spectroscopy Machine Learning (SpectraML)—remains relatively underexplored. Modern spectroscopic techniques (MS, NMR, IR, Raman, UV-Vis) generate an ever-growing volume of high-dimensional data, creating a pressing need for automated and intelligent analysis beyond traditional expert-based workflows. In this survey, we provide a unified review of SpectraML, systematically examining state-of-the-art approaches for both forward tasks (molecule-to-spectrum prediction) and inverse tasks (spectrum-to-molecule inference). We trace the historical evolution of ML in spectroscopy—from early pattern recognition to the latest foundation models capable of advanced reasoning—and offer a taxonomy of representative neural architectures, including graph-based and transformer-based methods. Addressing key challenges such as data quality, multimodal integration, and computational scalability, we highlight emerging directions like synthetic data generation, large-scale pretraining, and few- or zero-shot learning. To foster reproducible research, we release an open-source repository containing curated datasets and code implementations. Our survey serves as a roadmap for researchers, guiding advancements at the intersection of spectroscopy and AI.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "10:00",
        "session": "Data Mining",
        "poster_positions": "From board n32 to board n34"
    },
    {
        "id": "8740",
        "title": "Image Captioning Evaluation in the Age of Multimodal LLMs: Challenges and Future Perspectives",
        "authors": "Sara Sarto, Marcella Cornia, Rita Cucchiara",
        "abstract": "The evaluation of machine-generated captions is a complex and evolving challenge. With the advent of Multimodal Large Language Models (MLLMs), image captioning has become a core task, increasing the need for robust and reliable evaluation metrics. This survey provides a comprehensive overview of advancements in image captioning evaluation, analyzing the evolution, strengths, and limitations of existing metrics. We assess these metrics across multiple dimensions, including correlation with human judgment, ranking accuracy, and sensitivity to hallucinations. Additionally, we explore the challenges posed by the longer and more detailed captions generated by MLLMs and examine the adaptability of current metrics to these stylistic variations. Our analysis highlights some limitations of standard evaluation approaches and suggests promising directions for future research in image captioning assessment. For a comprehensive overview of captioning evaluation refer to our project page available at https:\/\/github.com\/aimagelab\/awesome-captioning-evaluation.",
        "location": "Montreal",
        "day": "August 22nd",
        "hour": "11:30",
        "session": "CV: multimodal LLMs",
        "poster_positions": "From board n1 to board n2"
    },
    {
        "id": "9000",
        "title": "Deep Learning for Multivariate Time Series Imputation: A Survey",
        "authors": "Jun Wang, Wenjie Du, Yiyuan Yang, Linglong Qian, Wei Cao, Keli Zhang, Wenjia Wang, Yuxuan Liang, Qingsong Wen",
        "abstract": "Missing values are ubiquitous in multivariate time series (MTS) data, posing significant challenges for accurate analysis and downstream applications. In recent years, deep learning-based methods have successfully handled missing data by leveraging complex temporal dependencies and learned data distributions. In this survey, we provide a comprehensive summary of deep learning approaches for multivariate time series imputation (MTSI) tasks. We propose a novel taxonomy that categorizes existing methods based on two key perspectives: imputation uncertainty and neural network architecture. Furthermore, we summarize existing MTSI toolkits with a particular emphasis on the PyPOTS Ecosystem, which provides an integrated and standardized foundation for MTSI research. Finally, we discuss key challenges and future research directions, which give insight for further MTSI research. This survey aims to serve as a valuable resource for researchers and practitioners in the field of time series analysis and missing data imputation tasks. A well-maintained MTSI paper and tool list is available at https:\/\/github.com\/WenjieDu\/Awesome_Imputation.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "11:00",
        "session": "ML: Time series and data streams"
    },
    {
        "id": "8748",
        "title": "The Evolving Landscape of LLM- and VLM-Integrated Reinforcement Learning",
        "authors": "Sheila Schoepp, Masoud Jafaripour, Yingyue Cao, Tianpei Yang, Fatemeh Abdollahi, Shadan Golestan, Zahin Sufiyan, Osmar R. Zaiane, Matthew E. Taylor",
        "abstract": "Reinforcement learning (RL) has shown impressive results in sequential decision-making tasks. Large Language Models (LLMs) and Vision-Language Models (VLMs) have recently emerged, exhibiting impressive capabilities in multimodal understanding and reasoning. These advances have led to a surge of research integrating LLMs and VLMs into RL. This survey reviews representative works in which LLMs and VLMs are used to overcome key challenges in RL, such as lack of prior knowledge, long-horizon planning, and reward design. We present a taxonomy that categorizes these LLM\/VLM-assisted RL approaches into three roles: agent, planner, and reward. We conclude by exploring open problems, including grounding, bias mitigation, improved representations, and action advice. By consolidating existing research and identifying future directions, this survey establishes a framework for integrating LLMs and VLMs into RL, advancing approaches that unify natural language and visual understanding with sequential decision-making.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "ML: Reinforcement Learning (2\/2)",
        "poster_positions": "From board n27 to board n33"
    },
    {
        "id": "7388",
        "title": "Grounding Creativity in Physics: A Brief Survey of Physical Priors in AIGC",
        "authors": "Siwei Meng, Yawei Luo, Ping Liu",
        "abstract": "Recent advancements in AI-generated content have significantly improved the realism of 3D and 4D generation. However, most existing methods prioritize appearance consistency while neglecting underlying physical principles, leading to artifacts such as unrealistic deformations, unstable dynamics, and implausible objects interactions. Incorporating physics priors into generative models has become a crucial research direction to enhance structural integrity and motion realism. This survey provides a review of physics-aware generative methods, systematically analyzing how physical constraints are integrated into 3D and 4D generation. First, we examine recent works in incorporating physical priors into static and dynamic 3D generation, categorizing methods based on representation types, including vision-based, NeRF-based, and Gaussian Splatting-based approaches. Second, we explore emerging techniques in 4D generation, focusing on methods that model temporal dynamics with physical simulations. Finally, we conduct a comparative analysis of major methods, highlighting their strengths, limitations, and suitability for different materials and motion dynamics. By presenting an in-depth analysis of physics-grounded AIGC, this survey aims to bridge the gap between generative models and physical realism, providing insights that inspire future research in physically consistent content generation.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "CV: Image and video synthesis and Generation"
    },
    {
        "id": "9152",
        "title": "Survey on Strategic Mining in Blockchain: A Reinforcement Learning Approach",
        "authors": "Jichen Li, Lijia Xie, Hanting Huang, Bo Zhou, Binfeng Song, Wanying Zeng, Xiaotie Deng, Xiao Zhang",
        "abstract": "Strategic mining attacks, such as selfish mining, exploit blockchain consensus protocols by deviating from honest behavior to maximize rewards. Markov Decision Process (MDP) analysis faces scalability challenges in modern digital economics, including blockchain. To address these limitations, reinforcement learning (RL) provides a scalable alternative, enabling adaptive strategy optimization in complex dynamic environments.\r\n\r\nIn this survey, we examine RL’s role in strategic mining analysis, comparing it to MDP-based approaches. We begin by reviewing foundational MDP models and their limitations, before exploring RL frameworks that can learn near-optimal strategies across various protocols. Building on this analysis, we compare RL techniques and their effectiveness in deriving security thresholds, such as the minimum attacker power required for profitable attacks. Expanding the discussion further, we classify consensus protocols and propose open challenges, such as multi-agent dynamics and real-world validation.\r\n\r\nThis survey highlights the potential of reinforcement learning to address the challenges of selfish mining, including protocol design, threat detection, and security analysis, while offering a strategic roadmap for researchers in decentralized systems and AI-driven analytics.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "15:00",
        "session": "Multidisciplinary Applications"
    },
    {
        "id": "8943",
        "title": "Grounding Open-Domain Knowledge from LLMs to Real-World Reinforcement Learning Tasks: A Survey",
        "authors": "Haiyan Yin, Hangwei Qian, Yaxin Shi, Ivor Tsang, Yew-Soon Ong",
        "abstract": "Grounding open-domain knowledge from large language models (LLMs) into real-world reinforcement learning (RL) tasks represents a transformative frontier in developing intelligent agents capable of advanced reasoning, adaptive planning, and robust decision-making in dynamic environments. In this paper, we introduce the LLM-RL Grounding Taxonomy, a systematic framework that categorizes emerging methods for integrating LLMs into RL systems by bridging their open-domain knowledge and reasoning capabilities with the task-specific dynamics, constraints, and objectives inherent to real-world RL environments. This taxonomy encompasses both training-free approaches, which leverage the zero-shot and few-shot generalization capabilities of LLMs without fine-tuning, and fine-tuning paradigms that adapt LLMs to environment-specific tasks for improved performance. We critically analyze these methodologies, highlight practical examples of effective knowledge grounding, and examine the challenges of alignment, generalization, and real-world deployment. Our work not only illustrates the potential of LLM-RL agents for enhanced decision-making, but also offers actionable insights for advancing the design of next-generation RL systems that integrate open-domain knowledge with adaptive learning.",
        "location": "Montreal",
        "day": "August 22nd",
        "hour": "11:30",
        "session": "Natural Language Processing (2\/2)",
        "poster_positions": "From board n8 to board n10"
    },
    {
        "id": "8684",
        "title": "Tensor Network: from the Perspective of AI4Science and Science4AI",
        "authors": "Junchi Yan, Yehui Tang, Xinyu Ye, Hao Xiong, Xiaoqiu Zhong, Yuhan Wang, Yuan Qi",
        "abstract": "Tensor network has been a promising numerical tool for computational problems across science and AI. For their emerging and fast development especially in the intersection between AI and science, this paper tries to present a compact review, regarding both their applications and its own recent technical development including open-source tools. Specifically, we make the observations that tensor network plays a functional role in matrix compression and representation, information fusion, as well as quantum-inspired algorithms, which can be generally regarded as Science4AI in our survey. On the other hand, there is an emerging line of research in tensor network in AI4Science especially like learning quantum many-body physics by using e.g. neural network quantum state. Importantly, we unify tensorization methodologies across classical and modern architectures, and particularly show how tensorization bridges low-order parameter spaces to high-dimensional representations without exponential parameter growth, and further point out their potential use in scientific computing. We conclude the paper with outlook for future trends.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "15:00",
        "session": "Machine Learning (2\/3)"
    },
    {
        "id": "8644",
        "title": "A Survey on Multi-View Knowledge Graph: Generation, Fusion, Applications and Future Directions",
        "authors": "Zihan Yang, Xiaohui Tao, Taotao Cai, Yifu Tang, Haoran Xie, Lin Li, Jianxin Li, Qing Li",
        "abstract": "Knowledge Graphs (KGs) have revolutionized structured knowledge representation, yet their capacity to model real-world complexity and heterogeneity remains fundamentally constrained. The emerging paradigm of Multi-View Knowledge Graphs (MVKGs) addresses this gap through multi-view learning, but existing research lacks systematic integration. This survey provides the first systematic consolidation of MVKG methodologies, with four pivotal contributions: 1) The first unified taxonomy of view generation paradigms that rigorously categorizes view into four types: structure, semantic, representation, and knowledge & modality; 2) A novel methodological typology for view fusion that systematically classifies techniques by fusion targets (feature, decision, and hybrid); 3) Task-centric application mapping that bridges theoretical MVKG constructs to node\/link\/graph-level downstream tasks; 4) A forward-looking roadmap identifying underexplored challenges. By unifying fragmented methodologies and formalizing MVKG design principles, this survey serves as a roadmap for advancing KG versatility in complex AI-driven scenarios. In doing so, it paves the way for more efficient knowledge integration, enhanced decision-making, and cross-domain learning in real-world applications.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "DM: Graph Data Mining",
        "poster_positions": "From board n39 to board n42"
    },
    {
        "id": "8549",
        "title": "A Survey of Pathology Foundation Model: Progress and Future Directions",
        "authors": "Conghao Xiong, Hao Chen, Joseph J. Y. Sung",
        "abstract": "Computational pathology, which involves analyzing whole slide images for automated cancer diagnosis, relies on multiple instance learning, where performance depends heavily on the feature extractor and aggregator. Recent Pathology Foundation Models (PFMs), pretrained on large-scale histopathology data, have significantly enhanced both the extractor and aggregator, but they lack a systematic analysis framework. In this survey, we present a hierarchical taxonomy organizing PFMs through a top-down philosophy applicable to foundation model analysis in any domain: model scope, model pretraining, and model design. Additionally, we systematically categorize PFM evaluation tasks into slide-level, patch-level, multimodal, and biological tasks, providing comprehensive benchmarking criteria. Our analysis identifies critical challenges in both PFM development (pathology-specific methodology, end-to-end pretraining, data-model scalability) and utilization (effective adaptation, model maintenance), paving the way for future directions in this promising field. Resources referenced in this survey are available at https:\/\/github.com\/BearCleverProud\/AwesomeWSI.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "09:40",
        "session": "CV: Biomedical image analysis"
    },
    {
        "id": "8835",
        "title": "A Survey on the Feedback Mechanism of LLM-based AI Agents",
        "authors": "Zhipeng Liu, Xuefeng Bai, Kehai Chen, Xinyang Chen, Xiucheng Li, Yang Xiang, Jin Liu, Hong-Dong Li, Yaowei Wang, Liqiang Nie, Min Zhang",
        "abstract": "Large language models (LLMs) are increasingly being adopted to develop general-purpose AI agents. However, it remains challenging for these LLM-based AI agents to efficiently learn from feedback and iteratively optimize their strategies. To address this challenge, tremendous efforts have been dedicated to designing diverse feedback mechanisms for LLM-based AI agents. To provide a comprehensive overview of this rapidly evolving field, this paper presents a systematic review of these studies, offering a holistic perspective on the feedback mechanisms in LLM-based AI agents. We begin by discussing the construction of LLM-based AI agents, introducing a generalized framework that encapsulates much of the existing work. Next, we delve into the exploration of feedback mechanisms, categorizing them into four distinct types: internal feedback, external feedback, multi-agent feedback, and human feedback. Additionally, we provide an overview of evaluation protocols and benchmarks specifically tailored for LLM-based AI agents. Finally, we highlight the significant challenges and identify potential directions for future studies. The relevant papers are summarized and will be consistently updated at https:\/\/github.com\/kevinson7515\/Agents-Feedback-Mechanisms.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "15:00",
        "session": "ML: LLM and VLM"
    },
    {
        "id": "8879",
        "title": "How to Enable LLM with 3D Capacity? A Survey of Spatial Reasoning in LLM",
        "authors": "Jirong Zha, Yuxuan Fan, Xiao Yang, Chen Gao, Xinlei Chen",
        "abstract": "3D spatial understanding is essential in real-world applications such as robotics, autonomous vehicles, virtual reality, and medical imaging. Recently, Large Language Models (LLMs), having demonstrated remarkable success across various domains, have been leveraged to enhance 3D understanding tasks, showing potential to surpass traditional computer vision methods. In this survey, we present a comprehensive review of methods integrating LLMs with 3D spatial understanding. We propose a taxonomy that categorizes existing methods into three branches: image-based methods deriving 3D understanding from 2D visual data, point cloud-based methods working directly with 3D representations, and hybrid modality-based methods combining multiple data streams. We systematically review representative methods along these categories, covering data representations, architectural modifications, and training strategies that bridge textual and 3D modalities. Finally, we discuss current limitations, including dataset scarcity and computational challenges, while highlighting promising research directions in spatial perception, multi-modal fusion, and real-world applications.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "CV: LLMs and Transformers"
    },
    {
        "id": "9246",
        "title": "Reward Models in Deep Reinforcement Learning: A Survey",
        "authors": "Rui Yu, Shenghua Wan, Yucen Wang, Chen-Xiao Gao, Le Gan, Zongzhang Zhang, De-Chuan Zhan",
        "abstract": "In reinforcement learning (RL), agents continually interact with the environment and use the feedback to refine their behavior. To guide policy optimization, reward models are introduced as proxies of the desired objectives, such that when the agent maximizes the accumulated reward, it also fulfills the task designer's intentions. Recently, significant attention from both academic and industrial researchers has focused on developing reward models that not only align closely with the true objectives but also facilitate policy optimization. In this survey, we provide a comprehensive review of reward modeling techniques within the RL literature. We begin by outlining the background and preliminaries in reward modeling. Next, we present an overview of recent reward modeling approaches, categorizing them based on the source, the mechanism, and the reward learning paradigm. Building on this understanding, we discuss various applications of these reward modeling techniques and review methods for evaluating reward models. Finally, we conclude by highlighting promising research directions in reward modeling. Altogether, this survey includes both established and emerging methods, filling the vacancy of a systematic review of reward models in current literature.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "ML: Reinforcement learning (2\/2)"
    },
    {
        "id": "8902",
        "title": "A Comprehensive and Systematic Review for Deep Learning-Based De Novo Peptide Sequencing",
        "authors": "Jun Xia, Jingbo Zhou, Shaorong Chen, Tianze Ling, Stan Z. Li",
        "abstract": "Tandem mass spectrometry (MS\/MS) has revolutionized the field of proteomics, enabling the high-throughput identification of proteins. However, one of the central challenges in mass spectrometry-based proteomics remains peptide identification, especially in the absence of a comprehensive peptide database. While traditional database search methods compare observed mass spectra to pre-existing protein databases, they are limited by the availability and completeness of these databases. \\emph{De novo} peptide sequencing, which derives peptide sequences directly from mass spectra, has emerged as a crucial approach in such cases. In recent years, deep learning has made significant strides in this domain. These methods train deep neural networks for translating mass spectra into peptide sequences without relying on any pre-constructed databases. Despite significant progress, this field still lacks a comprehensive and systematic review. In this paper, we provide the first review of deep learning-based \\emph{de novo} peptide sequencing techniques from the perspectives of data types, model architectures, decoding strategies, applications and evaluation metrics. We also identify key challenges and highlight promising avenues for future research, providing a valuable resource for the AI and scientific communities.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "MTA: Bioinformatics"
    },
    {
        "id": "4603",
        "title": "A Survey of Structural Entropy: Theory, Methods, and Applications",
        "authors": "Dingli Su, Hao Peng, Yicheng Pan, Angsheng Li",
        "abstract": "Classical information theory, a cornerstone of artificial intelligence, is fundamentally limited by its local perspective, often analyzing pairwise interactions while ignoring the larger, hierarchical architecture of complex systems. Structural entropy (SE) presents a paradigm shift, extending Shannon entropy to quantify information on a global scale and measure the uncertainty embedded in a system's organizational hierarchy. Although its applications have broadened significantly from its origins in community detection across diverse AI domains, a systematic synthesis of its theory, computational methods, and applications is currently lacking.\r\nThis survey provides a comprehensive overview of SE to fill this critical void in the literature. We offer a detailed examination of its theoretical foundations, computational frameworks, and key learning paradigms, with a focus on its integration with graph learning and reinforcement learning. Through an exploration of its diverse applications, we highlight the power of SE to advance graph-based analysis and modeling. Finally, we discuss key challenges and future research opportunities for incorporating SE principles into the development of more interpretable and theoretically grounded AI systems.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "Data Mining"
    },
    {
        "id": "8880",
        "title": "Game Theory Meets Large Language Models: A Systematic Survey",
        "authors": "Haoran Sun, Yusen Wu, Yukun Cheng, Xu Chu",
        "abstract": "Game theory establishes a fundamental framework for analyzing strategic interactions among rational decision-makers. The rapid advancement of large language models (LLMs) has sparked extensive research exploring the intersection of these two fields. Specifically, game-theoretic methods are being applied to evaluate and enhance LLM capabilities, while LLMs themselves are reshaping classic game models. This paper presents a comprehensive survey of the intersection of these fields, exploring a bidirectional relationship from three perspectives: (1) Establishing standardized game-based benchmarks for evaluating LLM behavior; (2) Leveraging game-theoretic methods to improve LLM performance through algorithmic innovations; (3) Characterizing the societal impacts of LLMs through game modeling. Among these three aspects, we also highlight how the equilibrium analysis for traditional game models is impacted by LLMs' advanced language understanding, which in turn extends the study of game theory. Finally, we identify key challenges and future research directions, assessing their feasibility based on the current state of the field. By bridging theoretical rigor with emerging AI capabilities, this survey aims to foster interdisciplinary collaboration and drive progress in this evolving research area.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "14:00",
        "session": "Game Theory",
        "poster_positions": "From board n59 to board n64"
    },
    {
        "id": "8933",
        "title": "Neuro-Symbolic Artificial Intelligence: A Task-Directed Survey in the Black-Box Models Era",
        "authors": "Giovanni Pio Delvecchio, Lorenzo Molfetta, Gianluca Moro",
        "abstract": "The integration of symbolic computing with neural networks has intrigued researchers since the first theorizations of Artificial intelligence (AI). The ability of Neuro-Symbolic (NeSy) methods to infer or exploit behavioral schema has been widely considered as one of the possible proxies for human-level intelligence. However, the limited semantic generalizability and the challenges in declining complex domains with pre-defined patterns and rules hinder their practical implementation in real-world scenarios. The unprecedented results achieved by connectionist systems since the last AI breakthrough in 2017 have raised questions about the competitiveness of NeSy solutions, with particular emphasis on the Natural Language Processing and Computer Vision fields. This survey examines task-specific advancements in the NeSy domain to explore how incorporating symbolic systems can enhance explainability and reasoning capabilities. Our findings are meant to serve as a resource for researchers exploring explainable NeSy methodologies for real-life tasks and applications. Reproducibility details and in-depth comments on each surveyed research work are made available at https:\/\/github.com\/disi-unibo-nlp\/task-oriented-neuro-symbolic.git.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "11:30",
        "session": "ML: Neurosymbolic AI",
        "poster_positions": "From board n22 to board n26"
    },
    {
        "id": "8905",
        "title": "Neuro-Symbolic Artificial Intelligence: Towards Improving the Reasoning Abilities of Large Language Models",
        "authors": "Xiao-Wen Yang, Jie-Jing Shao, Lan-Zhe Guo, Bo-Wen Zhang, Zhi Zhou, Lin-Han Jia, Wang-Zhou Dai, Yu-Feng Li",
        "abstract": "Large Language Models (LLMs) have shown promising results across various tasks, yet their reasoning capabilities remain a fundamental challenge. Developing AI systems with strong reasoning capabilities is regarded as a crucial milestone in the pursuit of Artificial General Intelligence (AGI) and has garnered considerable attention from both academia and industry. Various techniques have been explored to enhance the reasoning capabilities of LLMs, with neuro-symbolic approaches being a particularly promising way. This paper comprehensively reviews recent developments in neuro-symbolic approaches for enhancing LLM reasoning. We first present a formalization of reasoning tasks and give a brief introduction to the neuro-symbolic learning paradigm. Then, we discuss neuro-symbolic methods for improving the reasoning capabilities of LLMs from three perspectives: Symbolic->LLM, LLM->Symbolic, and LLM+Symbolic. Finally, we discuss several key challenges and promising future directions. We have also released a GitHub repository including papers and resources related to this survey: https:\/\/github.com\/LAMDASZ-ML\/Awesome-LLM-Reasoning-with-NeSy.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "15:00",
        "session": "ML: LLM and VLM"
    },
    {
        "id": "8886",
        "title": "A Unifying Perspective on Model Reuse: From Small to Large Pre-Trained Models",
        "authors": "Da-Wei Zhou, Han-Jia Ye",
        "abstract": "Machine learning has rapidly progressed, resulting in a vast repository of both general and specialized models that address diverse practical needs. Reusing pre-trained models (PTMs) from public model zoos has emerged as an effective strategy, leveraging rich model resources and reshaping traditional machine learning workflows. These PTMs encapsulate valuable inductive biases beneficial for downstream tasks. Well-designed reuse strategies enable models to be adapted beyond their original scope, enhancing both performance and efficiency in target machine learning systems. This survey offers a unifying perspective on model reuse, establishing connections across various domains and presenting a novel taxonomy that encompasses the full lifecycle of PTM utilization---including selection from model zoos, adaptation techniques, and related areas such as model representation learning. We delve into the similarities and distinctions between reusing specialized and general PTMs, providing insights into their respective advantages and limitations. Furthermore, we discuss key challenges, emerging trends, and future directions in model reuse, aiming to guide research and practice in the era of large-scale pre-trained models. A comprehensive list of papers about model reuse is available at https:\/\/github.com\/LAMDA-Model-Reuse\/Awesome-Model-Reuse.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "15:00",
        "session": "ML: LLM and VLM"
    },
    {
        "id": "8553",
        "title": "Human-Centric Foundation Models: Perception, Generation and Agentic Modeling",
        "authors": "Shixiang Tang, Yizhou Wang, Lu Chen, Yuan Wang, Sida Peng, Dan Xu, Wanli Ouyang",
        "abstract": "Human understanding and generation are critical for modeling digital humans and humanoid embodiments. Recently, Human-centric Foundation Models (HcFMs)—inspired by the success of generalist models such as large language and vision models—have emerged to unify diverse human-centric tasks into a single framework, surpassing traditional task-specific approaches. In this survey, we present a comprehensive overview of HcFMs by proposing a taxonomy that categorizes current approaches into four groups: (1) Human-centric Perception Foundation Models that capture fine-grained features for multi-modal 2D and 3D understanding; (2) Human-centric AIGC Foundation Models that generate high-fidelity, diverse human-related content; (3) Unified Perception and Generation Models that integrate these capabilities to enhance both human understanding and synthesis; and (4) Human-centric Agentic Foundation Models that extend beyond perception and generation to learn human-like intelligence and interactive behaviors for humanoid embodied tasks. We review state-of-the-art techniques, discuss emerging challenges and future research directions. This survey aims to serve as a roadmap for researchers and practitioners working towards more robust, versatile, and intelligent digital human and embodiments modeling. Website is https:\/\/github.com\/HumanCentricModels\/Awesome-Human-Centric-Foundation-Models\/",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "10:00",
        "session": "Humans and AI",
        "poster_positions": "From board n97 to board n101"
    },
    {
        "id": "9114",
        "title": "Federated Learning at the Forefront of Fairness: A Multifaceted Perspective",
        "authors": "Noorain Mukhtiar, Adnan Mahmood, Yipeng Zhou, Jian Yang, Jing Teng, Quan Z. Sheng",
        "abstract": "Fairness in Federated Learning (FL) is emerging as a critical factor driven by heterogeneous clients’ constraints and balanced model performance across various scenarios. In this survey, we delineate a comprehensive classification of the state-of-the-art fairness-aware approaches from a multifaceted perspective, i.e., model performance-oriented and capability-oriented. Moreover, we provide a framework to categorize and address various fairness concerns and associated technical aspects, examining their effectiveness in balancing equity and performance within FL frameworks. We further examine several significant evaluation metrics leveraged to measure fairness quantitatively. Finally, we explore exciting open research directions and propose prospective solutions that could drive future advancements in this important area, laying a solid foundation for researchers working toward fairness in FL.",
        "location": "Montreal",
        "day": "August 19th",
        "hour": "15:00",
        "session": "ML: Federated Learning",
        "poster_positions": "From board n42 to board n46"
    },
    {
        "id": "9107",
        "title": "Emerging Advances in Learned Video Compression: Models, Systems and Beyond",
        "authors": "Chuanmin Jia, Feng Ye, Siwei Ma, Wen Gao, Huifang Sun, Leonardo Chiariglione",
        "abstract": "Video compression is a fundamental topic in the visual intelligence, bridging visual signal sensing\/capturing and high-level visual analytics. The broad success of artificial intelligence (AI) technology has enriched the horizon of video compression into novel paradigms by leveraging end-to-end optimized neural models. In this survey, we first provide a comprehensive and systematic overview of recent literature on end-to-end optimized learned video coding, covering the spectrum of pioneering efforts in both uni-directional and bi-directional prediction based compression model designation. We further delve into the optimization techniques employed in learned video compression (LVC), emphasizing their technical innovations, advantages. Some standardization progress is also reported. Furthermore, we investigate the system design and hardware implementation challenges of the LVC inclusively. Finally, we present the extensive simulation results to demonstrate the superior compression performance of LVC models, addressing the question that why learned codecs and AI-based video technology would have with broad impact on future visual intelligence research.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "Machine Learning (3\/3)"
    },
    {
        "id": "8456",
        "title": "Control in Computational Social Choice",
        "authors": "Jiehua Chen, Joanna Kaczmarek, Paul Nüsken, Jörg Rothe, Ildikó Schlotter, Tessa Seeger",
        "abstract": "We survey the notion of control in various areas of computational social choice (COMSOC) such as voting, fair allocation, cooperative game theory, matching under preferences, and group identification. In all these scenarios, control can be exerted, for instance, by adding or deleting agents with the goal of influencing the outcome. We conclude by briefly covering control in some other COMSOC areas including participatory budgeting, judgment aggregation, and opinion diffusion.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "GTEP: Computational social choice (2\/2)",
        "poster_positions": "From board n51 to board n57"
    },
    {
        "id": "8432",
        "title": "Connector-S: A Survey of Connectors in Multi-modal Large Language Models",
        "authors": "Xun Zhu, Zheng Zhang, Xi Chen, Yiming Shi, Miao Li, Ji Wu",
        "abstract": "With the rapid advancements in multi-modal large language models (MLLMs), connectors play a pivotal role in bridging diverse modalities and enhancing model performance. However, the design and evolution of connectors have not been comprehensively analyzed, leaving gaps in understanding how these components function and hindering the development of more powerful connectors. In this survey, we systematically review the current progress of connectors in MLLMs and present a structured taxonomy that categorizes connectors into atomic operations (mapping, compression, mixture of experts) and holistic designs (multi-layer, multi-encoder, multi-modal scenarios), highlighting their technical contributions and advancements. Furthermore, we discuss several promising research frontiers and challenges, including high-resolution input, dynamic compression, guide information selection, combination strategy, and interpretability. This survey is intended to serve as a foundational reference and a clear roadmap for researchers, providing valuable insights into the design and optimization of next-generation connectors to enhance the performance and adaptability of MLLMs.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "11:00",
        "session": "ML: Reinforcement learning (1\/2)"
    },
    {
        "id": "9136",
        "title": "Words Over Pixels? Rethinking Vision in Multimodal Large Language Models",
        "authors": "Anubhooti Jain, Mayank Vatsa, Richa Singh",
        "abstract": "Multimodal Large Language Models (MLLMs) promise seamless integration of vision and language understanding. However, despite their strong performance, recent studies reveal that MLLMs often fail to effectively utilize visual information, frequently relying on textual cues instead. This survey provides a comprehensive analysis of the vision component in MLLMs, covering both application-level and architectural aspects. We investigate critical challenges such as weak spatial reasoning, poor fine-grained visual perception, and suboptimal fusion of visual and textual modalities. Additionally, we explore limitations in current vision encoders, benchmark inconsistencies, and their implications for downstream tasks. By synthesizing recent advancements, we highlight key research opportunities to enhance visual understanding, improve cross-modal alignment, and develop more robust and efficient MLLMs. Our observations emphasize the urgent need to elevate vision to an equal footing with language, paving the path for more reliable and perceptually aware multimodal models.",
        "location": "Montreal",
        "day": "August 22nd",
        "hour": "11:30",
        "session": "CV: multimodal LLMs",
        "poster_positions": "From board n1 to board n2"
    },
    {
        "id": "5438",
        "title": "A Survey of Optimization Modeling Meets LLMs: Progress and Future Directions",
        "authors": "Ziyang Xiao, Jingrong Xie, Lilin Xu, Shisi Guan, Jingyan Zhu, Xiongwei Han, Xiaojin Fu, WingYin Yu, Han Wu, Wei Shi, Qingcan Kang, Jiahui Duan, Tao Zhong, Mingxuan Yuan, Jia Zeng, Yuan Wang, Gang Chen, Dongxiang Zhang",
        "abstract": "By virtue of its great utility in solving real-world problems, optimization modeling has been widely employed for optimal decision-making across various sectors, but it requires substantial expertise from operations research professionals. With the advent of large language models (LLMs), new opportunities have emerged to automate the procedure of mathematical modeling. This survey presents a comprehensive and timely review of recent advancements that cover the entire technical stack, including data synthesis and fine-tuning for the base model,  inference frameworks, benchmark datasets, and performance evaluation. In addition, we conducted an in-depth analysis on the quality of benchmark datasets, which was found to have a surprisingly high error rate.  We cleaned the datasets and constructed a new leaderboard with fair performance evaluation in terms of base LLM model and datasets. We also build an online portal that integrates resources of cleaned datasets, code and paper repository to benefit the community.  Finally, we identify  limitations in current methodologies and outline future research opportunities.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "NLP: LLM"
    },
    {
        "id": "8251",
        "title": "One-shot Federated Learning Methods: A Practical Guide",
        "authors": "Xiang Liu, Zhenheng Tang, Xia Li, Yijun Song, Sijie Ji, Zemin Liu, Bo Han, Linshan Jiang, Jialin Li",
        "abstract": "One-shot Federated Learning (OFL) is a distributed machine learning paradigm that constrains client-server communication to a single round, addressing privacy and communication overhead issues associated with multiple rounds of data exchange in traditional Federated Learning (FL). OFL demonstrates the practical potential for integration with future approaches that require collaborative training models, such as large language models (LLMs). However, current OFL methods face two major challenges: data heterogeneity and model heterogeneity, which result in subpar performance compared to conventional FL methods. Worse still, despite numerous studies addressing these limitations, a comprehensive summary is still lacking. To address these gaps, this paper presents a systematic analysis of the challenges faced by OFL and thoroughly reviews the current methods. We also offer an innovative categorization method and analyze the trade-offs of various techniques. Additionally, we discuss the most promising future directions and the technologies that should be integrated into the OFL field. This work aims to provide guidance and insights for future research.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "ML: Federated learning (2\/2)"
    },
    {
        "id": "8557",
        "title": "RenderBender: A Survey on Adversarial Attacks Using Differentiable Rendering",
        "authors": "Matthew Hull, Haoran Wang, Matthew Lau, Alec Helbling, Mansi Phute, Chao Zhang, Zsolt Kira, Willian Lunardi, Martin Andreoni, Wenke Lee, Duen Horng Chau",
        "abstract": "Differentiable rendering techniques like Gaussian Splatting and Neural Radiance Fields have become powerful tools for generating high-fidelity models of 3D objects and scenes. Their ability to produce both physically plausible and differentiable models of scenes are key ingredient needed to produce physically plausible adversarial attacks on DNNs.  However, the adversarial machine learning community has yet to fully explore these capabilities, partly due to differing attack goals (e.g., misclassification, misdetection) and a wide range of possible scene manipulations used to achieve them (e.g., alter texture, mesh). This survey contributes a framework that unifies diverse goals and tasks, facilitating easy comparison of existing work, identifying research gaps, and highlighting future directions—ranging from expanding attack goals and tasks to account for new modalities, state-of-the-art models, tools, and pipelines, to underscoring the importance of studying real-world threats in complex scenes.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "10:00",
        "session": "CV: attacks",
        "poster_positions": "From board n4 to board n6"
    },
    {
        "id": "8993",
        "title": "40 Years of Research in Possibilistic Logic – a Survey",
        "authors": "Didier Dubois, Henri Prade",
        "abstract": "Possibilistic logic is forty years old. Possibilistic logic is a logic that handles classical logic formulas\r\nassociated with weights taking values in a linearly ordered set or more generally in a lattice. Over the\r\ndecades, possibilistic logic has undergone numerous developments at both theoretical and applied\r\nlevels. The ambition of this article is to review all these developments while exposing the main ideas\r\nbehind them.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "14:00",
        "session": "KR: Logic",
        "poster_positions": "From board n38 to board n44"
    },
    {
        "id": "8412",
        "title": "Generative AI for Immersive Video: Recent Advances and Future Opportunities",
        "authors": "Kaiyuan Hu, Yili Jin, Hao Zhou, Linfeng Du, Jiangchuan Liu, Xue Liu",
        "abstract": "Immersive video serves as a key component of eXtended Reality (XR) that aims to create and interact with simulated virtual or hybrid environments. Such a technology allows users to experience immersive sensations that transcend time and space, and meanwhile continuously providing training data for emerging technologies like Embodied AI. Thanks to the advancements in sensing, computing, and display, recent years have witnessed many excellent works for XR and related hardware or software systems. However, challenges like high creation cost, lack of immersion, and limited scalability hinder the practical application of immersive video services. Whilst recently emerged generative artificial intelligence (GenAI) provides us with new insights in tackling existing challenges. In this paper, we conduct a comprehensive survey into the recent advances and future opportunities on how GenAI can benefit immersive video services. By introducing a systematic taxonomy, we meticulously classify the pertinent techniques and applications into three well-defined categories aligned with the pipeline of immersive video service: content creation, network delivery, and client-side display. This categorization enables a structured exploration of the diverse roles on how GenAI can benefit immersive video service, providing a framework for a more comprehensive understanding and evaluation of these technologies. To the best of our knowledge, this work is the first systematic survey of GenAI in XR settings, laying a foundation for future research in this interdisciplinary domain.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "CV: videos",
        "poster_positions": "From board n1 to board n3"
    },
    {
        "id": "8711",
        "title": "A Survey on Model Repair in AI Planning",
        "authors": "Pascal Bercher, Sarath Sreedharan, Mauro Vallati",
        "abstract": "Accurate planning models are a prerequisite for the appropriate functioning of AI planning applications. Creating these models is, however, a tedious and error-prone task -- even for planning experts. This makes the provision of automated modeling support essential. In this work, we differentiate between approaches that learn models from scratch (called domain model acquisition) and those that repair flawed or incomplete ones. We survey approaches for the latter, including those that can be used for domain repair but have been developed for other applications, discuss possible optimization metrics (i.e., which repaired model to aim at), and conclude with lines of research we believe deserve more attention.",
        "location": "Montreal",
        "day": "August 19th",
        "hour": "15:00",
        "session": "Planning and Scheduling (2\/5)",
        "poster_positions": "From board n74 to board n80"
    },
    {
        "id": "4460",
        "title": "A Survey on Temporal Interaction Graph Representation Learning: Progress, Challenges, and Opportunities",
        "authors": "Pengfei Jiao, Hongjiang Chen, Xuan Guo, Zhidong Zhao, Dongxiao He, Di Jin",
        "abstract": "Temporal interaction graphs (TIGs), defined by sequences of timestamped interaction events, have become ubiquitous in real-world applications due to their capability to model complex dynamic system behaviors. As a result, temporal interaction graph representation learning (TIGRL) has garnered significant attention in recent years. TIGRL aims to embed nodes in TIGs into low-dimensional representations that effectively preserve both structural and temporal information, thereby enhancing the performance of downstream tasks such as classification, prediction, and clustering within constantly evolving data environments. In this paper, we begin by introducing the foundational concepts of TIGs and emphasizing the critical role of temporal dependencies. We then propose a comprehensive taxonomy of state-of-the-art TIGRL methods, systematically categorizing them based on the types of information utilized during the learning process to address the unique challenges inherent to TIGs. To facilitate further research and practical applications, we curate the source of datasets and benchmarks, providing valuable resources for empirical investigations. Finally, we examine key open challenges and explore promising research directions in TIGRL, laying the groundwork for future advancements that have the potential to shape the evolution of this field.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "11:00",
        "session": "DM: Mining graphs (3\/3)"
    },
    {
        "id": "9158",
        "title": "Evaluation of Medical Large Language Models: Taxonomy, Review, and Directions",
        "authors": "Anisio Lacerda, Gisele Pappa, Adriano César Machado Pereira, Wagner Meira Jr, Alexandre Guimarães de Almeida Barros",
        "abstract": "The integration of Large Language Models (LLMs) into medicine presents both great opportunities and significant challenges, particularly in ensuring these models are accurate, reliable, and safe. While LLMs have shown impressive capabilities in understanding and generating human language, their application in the medical domain requires careful evaluation due to the critical nature of medical applications which are inherently linked to patient life and health.  Current evaluations of LLMs in medicine are often fragmented and insufficient, with a lack of standardized performance metrics, limited use of real patient data, and insufficient attention to important applications, such as documentation, education, and research. Furthermore, traditional NLP-based evaluations are often inadequate for assessing the text generated by LLMs.  Therefore, a robust evaluation is essential to ensure the responsible and effective use of LLMs in medical settings, and to address the inherent challenges associated with their implementation. This paper explores the various dimensions of LLM evaluation in the medical domain, proposes a new taxonomy for categorizing medical applications, and discusses directions for future research in this critical area.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "ML: Large Language Models",
        "poster_positions": "From board n15 to board n21"
    },
    {
        "id": "9226",
        "title": "Empowering LLMs with Logical Reasoning: A Comprehensive Survey",
        "authors": "Fengxiang Cheng, Haoxuan Li, Fenrong Liu, Robert van Rooij, Kun Zhang, Zhouchen Lin",
        "abstract": "Large language models (LLMs) have achieved remarkable successes on various tasks. However, recent studies have found that there are still significant challenges to the logical reasoning abilities of LLMs, which can be categorized into the following two aspects: (1) Logical question answering: LLMs often fail to generate the correct answer within a complex logical problem which requires sophisticated deductive, inductive or abductive reasoning given a collection of premises and constrains. (2) Logical consistency: LLMs are prone to producing responses contradicting themselves across different questions. For example, a state-of-the-art question-answering LLM Macaw, answers Yes to both questions Is a magpie a bird? and Does a bird have wings? but answers No to Does a magpie have wings?. To facilitate this research direction, we comprehensively investigate the most cutting-edge methods and propose a detailed taxonomy. Specifically, to accurately answer complex logic questions, previous methods can be categorized based on reliance on external solvers, prompts, and fine-tuning. To avoid logical contradictions, we discuss concepts and solutions of various logical consistencies, including implication, negation, transitivity, factuality consistencies, and their composites. In addition, we review commonly used benchmark datasets and evaluation metrics, and discuss promising research directions, such as extending to modal logic to account for uncertainty and developing efficient algorithms that simultaneously satisfy multiple logical consistencies.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "14:00",
        "session": "KR: Logic",
        "poster_positions": "From board n38 to board n44"
    },
    {
        "id": "8788",
        "title": "Large Language Models for Causal Discovery: Current Landscape and Future Directions",
        "authors": "Guangya Wan, Yunsheng Lu, Yuqi Wu, Mengxuan Hu, Sheng Li",
        "abstract": "Causal discovery (CD) and Large Language Models (LLMs) have emerged as transformative fields in artificial intelligence that have evolved largely independently. While CD specializes in uncovering cause-effect relationships from data, and LLMs excel at natural language processing and generation, their integration presents unique opportunities for advancing causal understanding. This survey examines how LLMs are transforming CD across three key dimensions: direct causal extraction from text, integration of domain knowledge into statistical methods, and refinement of causal structures. We systematically analyze approaches that leverage LLMs for CD tasks, highlighting their innovative use of metadata and natural language for causal inference. Our analysis reveals both LLMs' potential to enhance traditional CD methods and their current limitations as imperfect expert systems. We identify key research gaps, outline evaluation frameworks and benchmarks for LLM-based causal discovery, and advocate future research efforts for leveraging LLMs in causality research. As the first comprehensive examination of the synergy between LLMs and CD, this work lays the groundwork for future advances in the field.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "ML: Causality and Diffusion Models"
    },
    {
        "id": "8361",
        "title": "Federated Low-Rank Adaptation for Foundation Models: A Survey",
        "authors": "Yiyuan Yang, Guodong Long, Qinghua Lu, Liming Zhu, Jing Jiang, Chengqi Zhang",
        "abstract": "Effectively leveraging private datasets remains a significant challenge in developing foundation models. Federated Learning (FL) has recently emerged as a collaborative framework that enables multiple users to fine-tune these models while mitigating data privacy risks.  Meanwhile, Low-Rank Adaptation (LoRA) offers a resource-efficient alternative for fine-tuning foundation models by dramatically reducing the number of trainable parameters. This survey examines how LoRA has been integrated into federated fine-tuning for foundation models—an area we term FedLoRA—by focusing on three key challenges: distributed learning, heterogeneity, and efficiency. We further categorize existing work based on the specific methods used to address each challenge. Finally, we discuss open research questions and highlight promising directions for future investigation, outlining the next steps for advancing FedLoRA.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "ML: Federated learning (2\/2)"
    },
    {
        "id": "8640",
        "title": "Understanding PII Leakage in Large Language Models: A Systematic Survey",
        "authors": "Shuai Cheng, Zhao Li, Shu Meng, Mengxia Ren, Haitao Xu, Shuai Hao, Chuan Yue, Fan Zhang",
        "abstract": "Large Language Models (LLMs) have demonstrated exceptional success across a variety of tasks, particularly in natural language processing, leading to their growing integration into numerous facets of daily life. However, this widespread deployment has raised substantial privacy concerns, especially regarding personally identifiable information (PII), which can be directly associated with specific individuals. The leakage of such information presents significant real-world privacy threats. In this paper, we conduct a systematic investigation into existing research on PII leakage in LLMs, encompassing commonly utilized PII datasets, evaluation metrics, and current studies on both PII leakage attacks and defensive strategies. Finally, we identify unresolved challenges in the current research landscape and suggest future research directions.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "09:40",
        "session": "MTA: Security and privacy"
    },
    {
        "id": "9071",
        "title": "Comprehensive Review of Neural Differential Equations for Time Series Analysis",
        "authors": "YongKyung Oh, Seungsu Kam, Jonghun Lee, Dong-Young Lim, Sungil Kim, Alex A. T. Bui",
        "abstract": "Time series modeling and analysis have become critical in various domains. Conventional methods such as RNNs and Transformers, while effective for discrete-time and regularly sampled data, face significant challenges in capturing the continuous dynamics and irregular sampling patterns inherent in real-world scenarios. Neural Differential Equations (NDEs) represent a paradigm shift by combining the flexibility of neural networks with the mathematical rigor of differential equations. This paper presents a comprehensive review of NDE-based methods for time series analysis, including neural ordinary differential equations, neural controlled differential equations, and neural stochastic differential equations. We provide a detailed discussion of their mathematical formulations, numerical methods, and applications, highlighting their ability to model continuous-time dynamics. Furthermore, we address key challenges and future research directions. This survey serves as a foundation for researchers and practitioners seeking to leverage NDEs for advanced time series analysis.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "14:00",
        "session": "ML: time series, sequences and signals",
        "poster_positions": "From board n6 to board n11"
    },
    {
        "id": "8721",
        "title": "Graph Neural Networks for Databases: A Survey",
        "authors": "Ziming Li, Youhuan Li, Yuyu Luo, Guoliang Li, Chuxu Zhang",
        "abstract": "Graph neural networks (GNNs) are powerful deep learning models for graph-structured data, demonstrating remarkable success across diverse domains. Recently, the database (DB) community has increasingly recognized the potentiality of GNNs, prompting a surge of researches focusing on improving database systems through GNN-based approaches. However, despite notable advances, There is a lack of a comprehensive review and understanding of how GNNs could improve DB systems.  \r\nTherefore, this survey aims to bridge this gap by providing a structured and in-depth overview of GNNs for DB systems. Specifically, we propose a new taxonomy that classifies existing methods into two key categories: (1) Relational Databases, which includes tasks like performance prediction, query optimization, and Text-to-SQL, and (2) Graph Databases, addressing challenges like efficient graph query processing and graph similarity computation. We systematically review key methods in each category, highlighting their contributions and practical implications.  \r\nFinally, we suggest promising avenues for integrating GNNs into Database systems.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "ML: Graphs (3\/3)"
    },
    {
        "id": "8516",
        "title": "Zero-shot Quantization: A Comprehensive Survey",
        "authors": "Minjun Kim, Jaehyeon Choi, Jongkeun Lee, Wonjin Cho, U Kang",
        "abstract": "Network quantization has proven to be a powerful approach to reduce the memory and computational demands of deep learning models for deployment on resource-constrained devices.\r\nHowever, traditional quantization methods often rely on access to training data, which is impractical in many real-world scenarios due to privacy, security, or regulatory constraints.\r\nZero-shot Quantization (ZSQ) emerges as a promising solution, achieving quantization without requiring any real data.\r\nIn this paper, we provide a comprehensive overview of ZSQ methods and their recent advancements.\r\nFirst, we provide a formal definition of the ZSQ problem and highlight the key challenges.\r\nThen, we categorize the existing ZSQ methods into classes based on data generation strategies, and analyze their motivations, core ideas, and key takeaways.\r\nLastly, we suggest future research directions to address the remaining limitations and advance the field of ZSQ.\r\nTo the best of our knowledge, this paper is the first in-depth survey on ZSQ.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "14:00",
        "session": "Computer vision (2\/3)",
        "poster_positions": "From board n1 to board n5"
    },
    {
        "id": "9009",
        "title": "A Comprehensive Survey on Physical Risk Control in the Era of Foundation Model-enabled Robotics",
        "authors": "Takeshi Kojima, Yaonan Zhu, Yusuke Iwasawa, Toshinori Kitamura, Gang Yan, Shu Morikuni, Ryosuke Takanami, Alfredo Solano, Tatsuya Matsushima, Akiko Murakami, Yutaka Matsuo",
        "abstract": "Recent Foundation Model-enabled robotics (FMRs) display greatly improved general-purpose skills, enabling more adaptable automation than conventional robotics. Their ability to handle diverse tasks thus creates new opportunities to replace human labor. However, unlike general foundation models, FMRs interact with the physical world, where their actions directly affect the safety of humans and surrounding objects, requiring careful deployment and control. Based on this proposition, our survey comprehensively summarizes robot control approaches to mitigate physical risks by covering all the lifespan of FMRs ranging from pre-deployment to post-accident stage. Specifically, we broadly divide the timeline into the following three phases: (1) pre-deployment phase, (2) pre-incident phase, and (3) post-incident phase. Throughout this survey, we find that there is much room to study (i) pre-incident risk mitigation strategies, (ii) research that assumes physical interaction with humans, and (iii) essential issues of foundation models themselves. We hope that this survey will be a milestone in providing a high-resolution analysis of the physical risks of FMRs and their control, contributing to the realization of a good human-robot relationship.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "14:00",
        "session": "Robotics",
        "poster_positions": "From board n72 to board n76"
    },
    {
        "id": "8323",
        "title": "Towards Cross-Modality Modeling for Time Series Analytics: A Survey in the LLM Era",
        "authors": "Chenxi Liu, Shaowen Zhou, Qianxiong Xu, Hao Miao, Cheng Long, Ziyue Li, Rui Zhao",
        "abstract": "The proliferation of edge devices has generated an unprecedented volume of time series data across different domains, motivating a variety of well-customized methods. Recently, Large Language Models (LLMs) have emerged as a new paradigm for time series analytics by leveraging the shared sequential nature of textual data and time series. However, a fundamental cross-modality gap between time series and LLMs exists, as LLMs are pre-trained on textual corpora and are not inherently optimized for time series. Many recent proposals are designed to address this issue. In this survey, we provide an up-to-date overview of LLMs-based cross-modality modeling for time series analytics. We first introduce a taxonomy that classifies existing approaches into four groups based on the type of textual data employed for time series modeling. We then summarize key cross-modality strategies, e.g., alignment and fusion, and discuss their applications across a range of downstream tasks. Furthermore, we conduct experiments on multimodal datasets from different application domains to investigate effective combinations of textual data and cross-modality strategies for enhancing time series analytics. Finally, we suggest several promising directions for future research. This survey is designed for a range of professionals, researchers, and practitioners interested in LLM-based time series modeling.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "09:40",
        "session": "DM: Mining temporal data"
    },
    {
        "id": "1106",
        "title": "A Survey on One-To-Many Negotiation: A Taxonomy of Interdependency",
        "authors": "Tamara C.P. Florijn, Pınar Yolum, Tim Baarslag",
        "abstract": "One-to-many negotiations are widely applied in various domains, contributing to efficient resource allocation and effective decision making. This wide variety of applications also brings a wide variety of implemented protocols, terminology and utility functions, which makes it hard to compare and improve strategies using existing solutions.  \r\n    We introduce a meta-model of negotiations, which characterizes almost all one-to-many negotiation research, bringing a unified description of the negotiations. This meta-model allows us to identify different classes of interdependency based on utility functions.\r\n    We show how existing one-to-many negotiations are related to each other, finding new insights and identifying knowledge gaps. \r\n     We suggest that a general utility function framework and benchmark scenarios for one-to-many negotiations could accommodate future advancement in this field.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "14:00",
        "session": "Agent-based and Multi-agent Systems (2\/3)",
        "poster_positions": "From board n65 to board n71"
    },
    {
        "id": "8965",
        "title": "Integrating Neurosymbolic AI in Advanced Air Mobility: A Comprehensive Survey",
        "authors": "Kamal Acharya, Iman Sharifi, Mehul Lad, Liang Sun, Houbing Song",
        "abstract": "Neurosymbolic AI combines neural network adaptability with symbolic reasoning, promising an approach to address the complex regulatory, operational, and safety challenges in Advanced Air Mobility (AAM). This survey reviews its applications across key AAM domains such as demand forecasting, aircraft design, and real-time air traffic management. Our analysis reveals a fragmented research landscape where methodologies, including Neurosymbolic Reinforcement Learning, have shown potential for dynamic optimization but still face hurdles in scalability, robustness, and compliance with aviation standards. We classify current advancements, present relevant case studies, and outline future research directions aimed at integrating these approaches into reliable, transparent AAM systems. By linking advanced AI techniques with AAM’s operational demands, this work provides a concise roadmap for researchers and practitioners developing next-generation air mobility solutions.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "11:30",
        "session": "ML: Neurosymbolic AI",
        "poster_positions": "From board n22 to board n26"
    },
    {
        "id": "8585",
        "title": "A Survey on Bandit Learning in Matching Markets",
        "authors": "Shuai Li, Zilong Wang, Fang Kong",
        "abstract": "The two-sided matching market problem has attracted extensive research in both computer science and economics due to its wide-ranging applications in multiple fields. In various online matching platforms, market participants often have unclear preferences. As a result, a growing area of research focuses on the online scenario. Here, one-side participants (players) gradually figure out their unknown preferences through multiple rounds of interactions with the other-side participants (arms). This survey comprehensively reviews and systematically organizes the abundant literature on bandit learning in matching markets. It covers not only existing theoretical achievements but also various other related aspects. Based on the current research, several distinct directions for future study have emerged. We are convinced that delving deeper into these directions could potentially yield theoretical algorithms that are more suitable for real-world situations.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "Machine Learning (1\/3)"
    },
    {
        "id": "8291",
        "title": "An Empirical Study of Federated Prompt Learning for Vision Language Model",
        "authors": "Zhihao Wang, Wenke Huang, Tian Chen, Zekun Shi, Guancheng Wan, Yu Qiao, Bin Yang, Jian Wang, Bing Li, Mang Ye",
        "abstract": "The Vision Language Model (VLM) excels in aligning vision and language representations, and prompt learning has emerged as a key technique for adapting such models to downstream tasks. However, the application of prompt learning with VLM in federated learning (FL) scenarios remains underexplored. This paper systematically investigates the behavioral differences between language prompt learning (LPT) and vision prompt learning (VPT) under data heterogeneity challenges, including label skew and domain shift. We conduct extensive experiments to evaluate the impact of various FL and prompt configurations, such as client scale, aggregation strategies, and prompt length, to assess the robustness of Federated Prompt Learning (FPL). Furthermore, we explore strategies for enhancing prompt learning in complex scenarios where label skew and domain shift coexist, including leveraging both prompt types when computational resources allow. Our findings offer practical insights into optimizing prompt learning in federated settings, contributing to the broader deployment of VLMs in privacy-preserving environments.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "ML: Federated learning (2\/2)"
    },
    {
        "id": "8954",
        "title": "Paradigms of AI Evaluation: Mapping Goals, Methodologies and Culture",
        "authors": "John Burden, Marko Tešić, Lorenzo Pacchiardi, José Hernández-Orallo",
        "abstract": "Research in AI evaluation has grown increasingly complex and multidisciplinary, attracting researchers with diverse backgrounds and objectives. As a result, divergent evaluation paradigms have emerged, often developing in isolation, adopting conflicting terminologies, and overlooking each other's contributions. This fragmentation has led to insular research trajectories and communication barriers both among different paradigms and with the general public, contributing to unmet expectations for deployed AI systems. To help bridge this insularity, in this paper we survey recent work in the AI evaluation landscape and identify six main paradigms. We characterise major recent contributions within each paradigm across key dimensions related to their goals, methodologies and research cultures. By clarifying the unique combination of questions and approaches associated with each paradigm, we aim to increase awareness of the breadth of current evaluation approaches and foster cross-pollination between different paradigms. We also identify potential gaps in the field to inspire future research directions.",
        "location": "Montreal",
        "day": "August 19th",
        "hour": "15:00",
        "session": "Machine Learning (1\/4)",
        "poster_positions": "From board n47 to board n53"
    },
    {
        "id": "8621",
        "title": "Generative Multi-Agent Collaboration in Embodied AI: A Systematic Review",
        "authors": "Di Wu, Xian Wei, Guang Chen, Hao Shen, Bo Jin",
        "abstract": "Embodied multi-agent systems (EMAS) have attracted growing attention for their potential to address complex, real-world challenges in areas such as logistics and robotics. Recent advances in foundation models pave the way for generative agents capable of richer communication and adaptive problem-solving. This survey provides a systematic examination of how EMAS can benefit from these generative capabilities. We propose a taxonomy that categorizes EMAS by system architectures and embodiment modalities, emphasizing how collaboration spans both physical and virtual contexts. Central building blocks, perception, planning, communication, and feedback, are then analyzed to illustrate how generative techniques bolster system robustness and flexibility. Through concrete examples, we demonstrate the transformative effects of integrating foundation models into embodied, multi-agent frameworks. Finally, we discuss challenges and future directions, underlining the significant promise of EMAS to reshape the landscape of AI-driven collaboration.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "Agent-based and Multi-agent Systems"
    }
]