[
    {
        "id": "8668",
        "title": "MolHFCNet: Enhancing Molecular Graph Representations with Hierarchical Feature Combining and Hybrid Pretraining",
        "authors": "Duy-Long Nguyen, Duc-Luong Ho-Viet, Anh-Thu Ngo-Tran, Quang H. Nguyen, Binh P. Nguyen",
        "abstract": "Efficient molecular property prediction is crucial in bioinformatics and cheminformatics, with applications in drug discovery, materials science, and chemical engineering. This paper introduces MolHFCNet, a graph neural network designed to enhance molecular representation learning. At its core, the n-Hierarchical Features Combining (n-HFC) module aggregates information across multiple hierarchical feature spaces, effectively capturing both local and global graph structures. Unlike conventional models, n-HFC maintains computational complexity comparable to a single full-dimensional graph layer while supporting either 2D or 3D molecular graphs, ensuring flexibility across tasks. Furthermore, we propose a novel graph pretraining strategy that integrates predictive and contrastive learning, enabling the model to capture local chemical interactions and global molecular contexts for robust embeddings. Experimental results on benchmark datasets demonstrate MolHFCNet’s superior accuracy and efficiency compared to state-of-the-art methods, highlighting the potential of high-order hierarchical feature learning for advancing molecular graph analysis. Our code is available at https:\/\/github.com\/ndlongvn\/MolHFCNet.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "AI4Tech (2\/3)",
        "poster_positions": "From board n124 to board n130"
    },
    {
        "id": "3345",
        "title": "TCCD: Tree-guided Continuous Causal Discovery via Collaborative MCTS-Parameter Optimization",
        "authors": "Jingjin Liu, Yingkai Xiao, Hankz Hankui Zhuo, Wushao Wen",
        "abstract": "Learning causal relationships in directed acyclic graphs (DAGs) from multi-type event sequences is a challenging task, especially in large-scale telecommunication networks. Existing methods struggle with the exponentially growing search space and lack global exploration. Gradient-based approaches are limited by their reliance on local information and often fail to generalize. To address these issues, we propose TCCD, a framework that combines Monte Carlo Tree Search (MCTS) with continuous gradient optimization. TCCD balances global exploration and local optimization, overcoming the shortcomings of purely gradient-based methods and enhancing generalization. By unifying various causal structure learning approaches, TCCD offers a scalable and efficient solution for causal inference in complex networks. Extensive experiments validate its superior performance on both synthetic and real-world datasets. Code and Appendix are available at https:\/\/github.com\/jzephyrl\/TCCD.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "AI4Tech: AI Enabling Technologies (2\/2)"
    },
    {
        "id": "2938",
        "title": "Unified Molecule-Text Language Model with Discrete Token Representation",
        "authors": "Shuhan Guo, Yatao Bian, Ruibing Wang, Nan Yin, Zhen Wang, Quanming Yao",
        "abstract": "The remarkable success of Large Language Models (LLMs) across diverse tasks has driven the research community to extend their capabilities to molecular applications. However, most molecular LLMs employ adapter-based architectures that fail to equally integrate molecule and text modalities and lack explicit supervision signals for the molecular modality. To address these issues, we introduce UniMoT, a Unified Molecule-Text LLM adopting a tokenizer-based architecture that expands the vocabulary of LLMs with molecule tokens. Specifically, we introduce a Vector Quantization-driven tokenizer that incorporates a Q-Former to bridge the modality gap between molecule and text. This tokenizer transforms molecular structures into sequences of  tokens exhibiting causal dependency, thereby encapsulating both  high-level molecular features and textual information. Equipped with this tokenizer, UniMoT  unifies molecule and text modalities under a shared token representation and an autoregressive training paradigm. This enables the model to process molecular structures as a distinct linguistic system and generate them in textual form. Through a four-stage training scheme, UniMoT functions as a multi-modal generalist capable of performing both molecule-to-text and text-to-molecule tasks. Extensive experiments demonstrate that UniMoT achieves state-of-the-art performance across a wide range of molecule comprehension and generation tasks.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "8382",
        "title": "COLUR: Confidence-Oriented Learning, Unlearning and Relearning with Noisy-Label Data for Model Restoration and Refinement",
        "authors": "Zhihao Sui, Liang Hu, Jian Cao, Usman Naseem, Zhongyuan Lai, Qi Zhang",
        "abstract": "Large deep learning models have achieved significant success in various tasks. However, the performance of a model can significantly degrade if it is needed to train on datasets with noisy labels with misleading or ambiguous information. To date, there are limited investigations on how to restore performance when model degradation has been incurred by noisy label data. Inspired by the \"forgetting mechanism\" in neuroscience, which enables accelerating the relearning of correct knowledge by unlearning the wrong knowledge, we propose a robust model restoration and refinement (MRR) framework COLUR, namely Confidence-Oriented Learning, Unlearning and Relearning. Specifically, we implement COLUR with an efficient co-training architecture to unlearn the influence of label noise, and then refine model confidence on each label for relearning. Extensive experiments are conducted on four real datasets and all evaluation results show that COLUR consistently outperforms other SOTA methods after MRR.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "8631",
        "title": "SpeechHGT: A Multimodal Hypergraph Transformer for Speech-Based Early Alzheimer’s Disease Detection",
        "authors": "Shagufta Abid, Dongyu Zhang, Ahsan Shehzad, Jing Ren, Shuo Yu, Hongfei Lin, Feng Xia",
        "abstract": "Early detection of Alzheimer's disease (AD) through spontaneous speech analysis represents a promising, non-invasive diagnostic approach. Existing methods predominantly rely on fusion-based multimodal deep learning, effectively integrating linguistic and acoustic features. However, these methods inadequately model higher-order interactions between modalities, reducing diagnostic accuracy. To address this, we introduce SpeechHGT, a multimodal hypergraph transformer designed to capture and learn higher-order interactions in spontaneous speech features. SpeechHGT encodes multimodal features as hypergraphs, where nodes represent individual features and hyperedges represent grouped interactions. A novel hypergraph attention mechanism enables robust modeling of both pairwise and higher-order interactions. Experimental evaluations on the DementiaBank datasets reveal that SpeechHGT achieves state-of-the-art performance, surpassing baseline models in accuracy and F1 score. These results highlight the potential of hypergraph-based models to improve AI-driven diagnostic tools for early AD detection.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "AI4Tech: AI Enabling Technologies (2\/2)"
    },
    {
        "id": "2111",
        "title": "Revisiting Continual Ultra-fine-grained Visual Recognition with Pre-trained Models",
        "authors": "Pengcheng Zhang, Xiaohan Yu, Meiying Gu, Yuchen Wu, Yongsheng Gao, Xiao Bai",
        "abstract": "Continual ultra-fine-grained visual recognition (C-UFG) aims to continuously learn to categorize the increasing number of cultivates (VC-UFG) and consistently recognize crops across reproductive stages (HC-UFG), which is a fundamental goal of intelligent agriculture. Despite the progress made in general continual learning, C-UFG remains an underexplored issue. This work establishes the first comprehensive C-UFG benchmark using massive soy leaf data. By analyzing recent pre-trained model (PTM) based continual learning methods on the proposed benchmark, we propose two simple yet effective PTM-based methods to boost the performance of VC-UFG and HC-UFG, respectively. On top of those, we integrate the two methods into one unified framework and propose the first unified model, Unic, that is capable of tackling the C-UFG problem where VC-UFG and HC-UFG co-exist in a single continual learning sequence. To understand the effectiveness of the proposed methods, we first evaluate the models on VC-UFG and HC-UFG challenges and then test the proposed Unic on a unified C-UFG challenge. Experimental results demonstrate the proposed methods achieve superior performance for C-UFG. The code is available at https:\/\/github.com\/PatrickZad\/unicufg.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "AI4Tech: AI Enabling Technologies (2\/2)"
    },
    {
        "id": "8620",
        "title": "DL-KDD: Dual-Lightness Knowledge Distillation for Action Recognition in the Dark",
        "authors": "Chi-Jui Chang, Oscar Tai-Yuan Chen, Vincent S. Tseng",
        "abstract": "Human action recognition in dark videos is a challenging task for computer vision due to the low quality of the videos filmed in the dark. Recent studies focused on applying dark enhancement methods to improve the visibility of the video. However, such video processing results in the loss of critical information in the original (un-enhanced) video. Conversely, traditional two-stream methods are capable of learning information from both original and enhanced videos, but it can lead to a significant increase in the computational cost. To address these challenges, we propose a novel knowledge-distillation-based framework, named Dual-Lightness KnowleDge Distillation (DL-KDD), which simultaneously\r\nresolves the aforementioned issues by enabling a student model to obtain both original features and light-enhanced knowledge without additional complexity, thus improving the performance of the model and avoiding extra computational cost. Through comprehensive evaluations, the proposed DL-KDD, with only original video required as input during the inference phase, significantly outperforms state-of-the-art methods on the widely-used dark video datasets. The results highlight the excellence of our proposed knowledge-distillation-based framework for dark video human action recognition.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "AI4Tech (2\/3)",
        "poster_positions": "From board n124 to board n130"
    },
    {
        "id": "8604",
        "title": "The Graph’s Apprentice: Teaching an LLM Low-Level Knowledge for Circuit Quality Estimation",
        "authors": "Reza Moravej, Saurabh Bodhe, Zhanguang Zhang, Didier Chételat, Dimitrios Tsaras, Yingxue Zhang, Hui-Ling Zhen, Jianye Hao, Mingxuan Yuan",
        "abstract": "Logic synthesis is a crucial phase in the circuit design process, responsible for transforming hardware description language (HDL) designs into optimized netlists. However, traditional logic synthesis methods are computationally intensive, restricting their iterative use in refining chip designs. Recent advancements in large language models (LLMs), particularly those fine-tuned on programming languages, present a promising alternative. This work proposes augmenting LLMs with predictor networks trained to estimate circuit quality directly from HDL code. To enhance performance, the model is regularized using embeddings from graph neural networks (GNNs) trained on Look-Up Table (LUT) graphs, thereby incorporating lower-level circuit insights. The proposed method demonstrates superior performance compared to existing graph-based RTL-level estimation techniques on the established benchmark OpenABCD, while providing instant feedback on HDL code quality.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "10:00",
        "session": "AI4Tech (1\/3)",
        "poster_positions": "From board n93 to board n97"
    },
    {
        "id": "8887",
        "title": "DeepFeatIoT: Unifying Deep Learned, Randomized, and LLM Features for Enhanced IoT Time Series Sensor Data Classification in Smart Industries",
        "authors": "Muhammad Sakib Khan Inan, Kewen Liao",
        "abstract": "Internet of Things (IoT) sensors are ubiquitous technologies deployed across smart cities, industrial sites, and healthcare systems. They continuously generate time series data that enable advanced analytics and automation in industries. However, challenges such as the loss or ambiguity of sensor metadata, heterogeneity in data sources, varying sampling frequencies, inconsistent units of measurement, and irregular timestamps make raw IoT time series data difficult to interpret, undermining the effectiveness of smart systems. To address these challenges, we propose a novel deep learning model, DeepFeatIoT, which integrates learned local and global features with non-learned randomized convolutional kernel-based features and features from large language models (LLMs). This straightforward yet unique fusion of diverse learned and non-learned features significantly enhances IoT time series sensor data classification, even in scenarios with limited labeled data. Our model's effectiveness is demonstrated through its consistent and generalized performance across multiple real-world IoT sensor datasets from diverse critical application domains, outperforming state-of-the-art benchmark models. These results highlight DeepFeatIoT's potential to drive significant advancements in IoT analytics and support the development of next-generation smart systems.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "AI4Tech (2\/3)",
        "poster_positions": "From board n124 to board n130"
    },
    {
        "id": "9061",
        "title": "Physics-based Generative Models for Geometrically Consistent and Interpretable Wireless Channel Synthesis",
        "authors": "Satyavrat Wagle, Akshay Malhotra, Shahab Hamidi-Rad, Aditya Sant, David J. Love, Christopher G. Brinton",
        "abstract": "In recent years, machine learning (ML) methods have become increasingly popular in wireless communication systems for several applications. A critical bottleneck for designing ML systems for wireless communications is the availability of realistic wireless channel datasets, which are extremely resource-intensive to produce. To this end, the generation of realistic wireless channels plays a key role in the subsequent design of effective ML algorithms for wireless communication systems. Generative models have been proposed to synthesize channel matrices, but outputs produced by such methods may not correspond to geometrically viable channels and do not provide any insight into the scenario being generated. In this work, we aim to address both these issues by integrating established parametric, physics-based geometric channel (PPGC) modeling frameworks with generative methods to produce realistic channel matrices with interpretable representations in the parameter domain. We show that the generative model converges to prohibitively suboptimal stationary points when learning the underlying prior directly over the parameters due to the non-convex PPGC model. To address this limitation, we propose a linearized reformulation of the problem to ensure smooth gradient flow during generative model training, while also providing insights into the underlying physical environment. We evaluate our model against prior baselines by comparing the generated, scenario-specific samples in terms of the 2-Wasserstein distance and through its utility when used for downstream compression tasks.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "10:00",
        "session": "AI4Tech (1\/3)",
        "poster_positions": "From board n93 to board n97"
    },
    {
        "id": "8622",
        "title": "DeCo: Defect-Aware Modeling with Contrasting Matching for Optimizing Task Assignment in Online IC Testing",
        "authors": "Lo Pang-Yun Ting, Yu-Hao Chiang, Yi-Tung Tsai, Hsu-Chao Lai, Kun-Ta Chuang",
        "abstract": "In the semiconductor industry, integrated circuit (IC) processes play a vital role, as the rising complexity and market expectations necessitate improvements in yield. Identifying IC defects and assigning IC testing tasks to the right engineers improves efficiency and reduces losses. While current studies emphasize fault localization or defect classification, they overlook the integration of defect characteristics, historical failures, and the insights from engineer expertise, which restrains their effectiveness in improving IC handling.  To leverage AI for these challenges, we propose DeCo, an innovative approach for optimizing task assignment in IC testing. DeCo constructs a novel defect-aware graph from IC testing reports, capturing co-failure relationships to enhance defect differentiation, even with scarce defect data. Additionally, it formulates defect-aware representations for engineers and tasks, reinforced by local and global structure modeling on the defect-aware graph. Finally, a contrasting-based assignment mechanism pairs testing tasks with QA engineers by considering their skill level and current workload, thus promoting an equitable and efficient job dispatch. Experiments on a real-world dataset demonstrate that DeCo achieves the highest task-handling success rates in different scenarios, exceeding 80%, while also maintaining balanced workloads on both scarce or expanded defect data. Moreover, case studies reveal that DeCo can assign tasks to potentially capable engineers, even for their unfamiliar defects, highlighting its potential as an AI-driven solution for the real-world IC failure analysis and task handling.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "AI4Tech (2\/3)",
        "poster_positions": "From board n124 to board n130"
    },
    {
        "id": "8737",
        "title": "Generative Co-Design of Antibody Sequences and Structures via Black-Box Guidance in a Shared Latent Space",
        "authors": "Yinghua Yao, Yuangang Pan, Xixian Chen",
        "abstract": "Advancements in deep generative models have enabled the joint modeling of antibody sequence and structure, given the antigen-antibody complex as context. However, existing approaches for optimizing complementarity-determining regions (CDRs) to improve developability properties operate in the raw data space, leading to excessively costly evaluations due to the inefficient search process. To address this, we propose LatEnt blAck-box Design (LEAD), a sequence-structure co-design framework that optimizes both sequence and structure within their shared latent space. Optimizing shared latent codes can not only break through the limitations of existing methods, but also ensure synchronization of different modality designs. Particularly, we design a black-box guidance strategy to accommodate real-world scenarios where many property evaluators are non-differentiable. Experimental results demonstrate that our LEAD achieves superior optimization performance for both single and multi-property objectives. Notably, LEAD reduces query consumption by a half while surpassing baseline methods in property optimization. The code is available at https:\/\/github.com\/EvaFlower\/LatEnt-blAck-box-Design.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "AI4Tech: AI Enabling Technologies (2\/2)"
    },
    {
        "id": "8717",
        "title": "SpaceDet: A Large-scale Space-based Image Dataset and RSO Detection for Space Situational Awareness",
        "authors": "Jiaping Xiao, Rangya Zhang, Yuhang Zhang, Lu Bai, Qianlei Jia, Mir Feroskhan",
        "abstract": "Space situational awareness (SSA) plays an imperative role in maintaining safe space operations, especially given the increasingly congested space traffic around the Earth. Space-based SSA offers a flexible and lightweight solution compared to traditional ground-based SSA. With advanced machine learning approaches, space-based SSA can extract features from high-resolution images in space to detect and track resident space objects (RSOs). However, existing spacecraft image datasets, such as SPARK, fall short of providing realistic camera observations, rendering the derived algorithms unsuitable for real SSA systems. In this work, we introduce SpaceDet, a large-scale realistic space-based image dataset for SSA. We consider accurate space orbit dynamics and a physical camera model with various noise distributions, generating images at the photon level. To extend the available observation window, four overlapping cameras are simulated with a fixed rotation angle. SpaceDet includes images of RSOs observed from 19 km to 63,000 km, captured by a tracker operating in LEO, MEO, and GEO orbits over a period of 5,000 seconds. Each image has a resolution of 4418 x 4418 pixels, providing detailed features for developing advanced SSA approaches. We split the dataset into three subsets: SpaceDet-100, SpaceDet-5000, and SpaceDet-full, catering to various image processing applications. The SpaceDet-full corpus includes a comprehensive dataloader with 781.5 GB of images and 25.9 MB of ground truth labels. Furthermore, we adapted detection and tracking algorithms on the collected dataset using a specified splitting method to accelerate the training process. The trained model can detect RSOs from real-world space observations with zero-shot capability.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "AI4Tech: AI Enabling Technologies (2\/2)"
    },
    {
        "id": "8751",
        "title": "Hallucination Reduction in Video-Language Models via Hierarchical Multimodal Consistency",
        "authors": "Jisheng Dang, Shengjun Deng, Haochen Chang, Teng Wang, Bimei Wang, Shude Wang, Nannan Zhu, Guo Niu, Jingwen Zhao, Jizhao Liu",
        "abstract": "The rapid advancement of large language models (LLMs) has led to the widespread adoption of video-language models (VLMs) across various domains. However, VLMs are often hindered by their limited semantic discrimination capability, exacerbated by the limited diversity and biased sample distribution of most video-language datasets. This limitation results in a biased understanding of the semantics between visual concepts, leading to hallucinations. To address this challenge, we propose a Multi-level Multimodal Alignment (MMA) framework that leverages a text encoder and semantic discriminative loss to achieve multi-level alignment. This enables the model to capture both low-level and high-level semantic relationships, thereby reducing hallucinations. By incorporating language-level alignment into the training process, our approach ensures stronger semantic consistency between video and textual modalities. Furthermore, we introduce a two-stage progressive training strategy that exploits larger and more diverse datasets to enhance semantic alignment and better capture general semantic relationships between visual and textual modalities. Our comprehensive experiments demonstrate that the proposed MMA method significantly mitigates hallucinations and achieves state-of-the-art performance across multiple video-language tasks, establishing a new benchmark in the field.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "AI4Tech: AI Enabling Technologies (2\/2)"
    },
    {
        "id": "2124",
        "title": "Transformer-based Reinforcement Learning for Net Ordering in Detailed Routing",
        "authors": "Zhanwen Zhou, Hankz Hankui Zhuo, Jinghua Zhou, Wushao Wen",
        "abstract": "With feature size shrinking and design complexity increasing, detailed routing has become a crucial challenge in VLSI design. Although detailed routers have been proposed to judiciously handle hard-to-access pins and various design rules, their performances are sensitive to the order of nets to be routed, especially for those sequential routers with ripup-and-reroute scheme. In the published literature, net ordering strategies mainly rely on experts' knowledge to design heuristics to guarantee their performances. In this paper, we propose a novel transformer-based reinforcement learning framework for net ordering in detailed routing, aiming at automatically gaining failure\/success routing experiences and building net order policies to guide detailed routing. Our experimental results show that our framework can effectively reduce the number of design rule violations and routing cost with comparable wirelength and via count, with comparison to state-of-the-art approaches.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "AI4Tech: AI Enabling Technologies (2\/2)"
    },
    {
        "id": "8298",
        "title": "RF-DTR: A Multi-Stage DCT Token Regression Network for Progressive Rib Fracture Mask Refinement",
        "authors": "ShouYu Chen, Liang Hu, JunTao Wang, Usman Naseem, Zhongyuan Lai, Qi Zhang",
        "abstract": "Rib fracture patterns are key indicators of trauma severity. Detecting and locating these fractures is a critical yet time-consuming task, especially in 3D imaging, due to their minute size and irregular geometries. Existing voxel-based spatial methods fail to capture frequency-domain variations inherent in imaging and do not replicate the progressive refinement process used by clinicians during manual annotation, leading to suboptimal results. We propose a novel regression network, RF-DTR, incorporating a gated regressor mechanism and operating entirely in the frequency domain to address these challenges. Specifically, we present an innovative spatial-frequency transform applied to volumes and corresponding masks. Furthermore, we introduce a Mahalanobis regularization technique to enhance the model and learn high-frequency DCT components relevant to clinical tasks. Finally, a hierarchical penalty is proposed to improve the confidence of the prediction. Extensive experiments confirm our method's superiority in handling complex, sparsely annotated medical imaging datasets.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "AI4Tech: AI Enabling Technologies (2\/2)"
    },
    {
        "id": "8845",
        "title": "Empowering Quantum Serverless Circuit Deployment Optimization via Graph Contrastive Learning and Learning-to-Rank Co-designed Approaches",
        "authors": "Tingting Li, Ziming Zhao, Jianwei Yin",
        "abstract": "With the rapid advancements in quantum computing, cloud-based quantum services have gained increasing prominence. However, due to quantum noise, optimizing the deployment of quantum circuits remains an NP-hard problem with an expansive search space. Existing methods usually use heuristic algorithms to approximate the solution, such as the representative IBM Qiskit. On the one hand, they often find suboptimal deployment solutions. On the other hand, prior technologies do not consider user-specific requirements and can only provide a single deployment strategy. In this paper, we propose QCDeploy that can provide a ranked list of effective deployment strategies to optimize quantum serverless circuit deployment. Specifically, we model quantum circuits as Directed Acyclic Graph (DAG) representations and utilize graph contrastive learning for vector embedding. Then, a tailored list-aware learning-to-rank architecture is employed to generate a list of candidate strategies (prioritizing better strategies). We conduct extensive evaluations involving 45 prevalent quantum algorithm circuits across 3~5 qubits, utilizing 3 IBM quantum physical devices with three types of chip topologies. The results demonstrate that our proposed framework significantly outperforms IBMQ's default deployment scheme, e.g., achieving 17.95% overhead reduction and increasing the execution success rate by 20%~40%.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "8333",
        "title": "Optimize Battery Control: A Multi-Objective Evolutionary Ensemble Reinforcement Learning Approach",
        "authors": "Jingwei Hu, Kai Xie, Zheng Fang, Xiaodong Li, Junchi Yan, Zhihong Zhang",
        "abstract": "The Dynamically Reconfigurable Battery (DRB) systems, which use high-speed power electronic switches to dynamically adjust battery interconnections in real-time, are critical to the performance of the battery pack. Traditional battery management strategies often fail to address multi-objective optimization, leading to imbalanced performance and inadequate energy utilization. To enhance decision-making across multiple objectives, an Evolutionary Ensemble Reinforcement Learning (EERL) framework is proposed in this paper. This framework incorporates evolutionary algorithms to associate ensemble learning, thus improving reinforcement learning (RL) performance. It decomposes a complex objective into multiple sub-objectives, each optimized independently, while incorporating diverse performance metrics into the correlation stage to derive the Pareto optimal solution. The EERL can efficiently mitigate potential adverse effects such as short circuits, disconnections, and reverse charging, thereby effectively reducing capacity differences among various batteries. Simulations and real-world testing demonstrate that the proposed approach overcomes the issue of local optima entrapment in multi-objective optimization scenarios. In a real-world system, an 11.08 % increase in energy efficiency is observed compared to existing approaches.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "8587",
        "title": "FLARE: A Framework for Stellar Flare Forecasting Using Stellar Physical Properties and Historical Records",
        "authors": "Bingke Zhu, Xiaoxiao Wang, Minghui Jia, Yihan Tao, Xiao Kong, Ali Luo, Yingying Chen, Ming Tang, Jinqiao Wang",
        "abstract": "Stellar flare events are critical observational samples for astronomical research; however, recorded flare events remain limited. Stellar flare forecasting can provide additional flare event samples to support research efforts. Despite this potential, no specialized models for stellar flare forecasting have been proposed to date. In this paper, we present extensive experimental evidence demonstrating that both stellar physical properties and historical flare records are valuable inputs for flare forecasting tasks. We then introduce FLARE (Forecasting Light-curve-based Astronomical Records via features Ensemble), the first-of-its-kind large model specifically designed for stellar flare forecasting. FLARE integrates stellar physical properties and historical flare records through a novel Soft Prompt Module and Residual Record Fusion Module. Experiments on the Kepler light curve dataset demonstrate that FLARE achieves superior performance compared to other methods across all evaluation metrics. Finally, we validate the forecast capability of our model through a comprehensive case study.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "8310",
        "title": "Rethinking Remaining Useful Life Prediction with Scarce Time Series Data: Regression Under Indirect Supervision",
        "authors": "Jiaxiang Cheng, Yipeng Pang, Guoqiang Hu",
        "abstract": "Supervised time series prediction relies on directly measured target variables, but real-world use cases such as predicting remaining useful life (RUL) involve indirect supervision, where the target variable is labeled as a function of another dependent variable. Trending temporal regression techniques rely on sequential time series inputs to capture temporal patterns, requiring interpolation when dealing with sparsely and irregularly sampled covariates along the timeline. However, interpolation can introduce significant biases, particularly with highly scarce data. In this paper, we address the RUL prediction problem with data scarcity as time series regression under indirect supervision. We introduce a unified framework called parameterized static regression, which takes single data points as inputs for regression of target values, inherently handling data scarcity without requiring interpolation. The time dependency under indirect supervision is captured via a parametrical rectification (PR) process, approximating a parametric function during inference with historical posteriori estimates, following the same underlying distribution used for labeling during training. Additionally, we propose a novel batch training technique for tasks in indirect supervision to prevent overfitting and enhance efficiency. We evaluate our model on public benchmarks for RUL prediction with simulated data scarcity. Our method demonstrates competitive performance in prediction accuracy when dealing with highly scarce time series data.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "8383",
        "title": "AI4TRT: Automatic Simulation of Teeth Restoration Treatment",
        "authors": "Feihong Shen, Yuer Ye",
        "abstract": "Visualizing restoration treatments is a crucial task in dentistry. Traditionally, dentists drag the standard template tooth line onto the inner image from the front view to simulate the outcome of the restoration. This process lacks the precision needed for patient presentation. We find that calculating the camera pose and the relative positions of the upper and lower jaws can enhance visualization accuracy and efficiency while assisting dentists in treatment design. In this work, we leverage the optical flow model and a customized point renderer to help dentists show the treatment outcome to the patient. Specifically, we take the 3D scan model and the intraoral image pair as input. Our framework automatically outputs the camera pose and the relative position of the upper and lower jaws. With these parameters, dentists can directly design the restoration treatment on the 3D scan model without caring about the 2D visualization. Then the designed tooth line and other simulation modalities can be rendered on the intraoral image with our customized renderer. Our framework relieves the labor of dentists and shows the case precisely.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "AI4Tech: AI Enabling Technologies (2\/2)"
    },
    {
        "id": "9166",
        "title": "Generating Grounded Responses to Counter Misinformation via Learning Efficient Fine-Grained Critiques ",
        "authors": "Xiaofei Xu, Xiuzhen Zhang, Ke Deng",
        "abstract": "Fake news and misinformation poses a significant threat to society, making efficient mitigation essential. However, manual fact-checking is costly and lacks scalability. Large Language Models (LLMs) offer promise in automating counter-response generation to mitigate misinformation, but a critical challenge lies in their tendency to hallucinate non-factual information. Existing models mainly rely on LLM self-feedback to reduce hallucination, but this approach is computationally expensive. In this paper, we propose MisMitiFact, Misinformation Mitigation grounded in Facts, an efficient framework for generating fact-grounded counter-responses at scale. MisMitiFact generates simple critique feedback to refine LLM outputs, ensuring responses are grounded in evidence. \r\nWe develop lightweight, fine-grained critique models trained on data sourced from readily available fact-checking sites to identify and correct errors in key elements such as numerals, entities, and topics in LLM generations. Experiments show that MisMitiFact generates counter-responses of comparable quality to LLMs' self-feedback while using significantly smaller critique models. Importantly, it achieves ~5x increase in feedback generation throughput, making it highly suitable for cost-effective, large-scale misinformation mitigation. Code and additional results are available at https:\/\/github.com\/xxfwin\/MisMitiFact.",
        "location": "Montreal",
        "day": "August 22nd",
        "hour": "10:00",
        "session": "AI4Tech (3\/3)",
        "poster_positions": "From board n53 to board n56"
    },
    {
        "id": "8908",
        "title": "Multi-Hierarchical Fine-Grained Feature Mapping Driven by Feature Contributions for Molecular Odor Prediction",
        "authors": "Hongxin Xie, Jiande Sun, Fanfu Xue, Zifei Han, Shanshan Feng, Qi Chen",
        "abstract": "Molecular odor prediction involves using a molecule's structure to estimate its odor. While accurate prediction remains challenging, AI models can suggest potential odors. Existing methods, however, often rely on basic descriptors or handcrafted fingerprints, which lack expressive power and hinder effective learning. Furthermore, these methods suffer from severe class imbalance, limiting the training effectiveness of AI models. To address these challenges, we propose a Feature Contribution-driven Hierarchical Multi-Feature Mapping Network (HMFNet). Specifically, we introduce a fine-grained, Local Multi-Hierarchy Feature Extraction module (LMFE) that performs deep feature extraction at the atomic level, capturing detailed features crucial for odor prediction. To enhance the extraction of discriminative atomic features, we integrate a Harmonic Modulated Feature Mapping (HMFM). This module dynamically learns feature importance and frequency modulation, improving the model's capability to capture relevant patterns. Additionally, a Global Multi-Hierarchy Feature Extraction module (GMFE) is designed to learn global features from the molecular graph topology, enabling the model to fully leverage global information and enhance its discriminative power for odor prediction. To further mitigate the issue of class imbalance, we propose a Chemically-Informed Loss (CIL). Experimental results demonstrate that our approach significantly improves performance across various deep learning models, highlighting its potential to advance molecular structure representation and accelerate the development of AI-driven technologies.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "AI4Tech: AI Enabling Technologies (2\/2)"
    },
    {
        "id": "6758",
        "title": "CycSeq: Leveraging Cyclic Data Generation for Accurate Perturbation Prediction in Single-Cell RNA-Seq",
        "authors": "Yicheng Liu, Sai Wu, Tianyun Zhang, Chang Yao, Ning Shen",
        "abstract": "Understanding and predicting the effects of cellular perturbations using single-cell sequencing technology remains a critical and challenging problem in biotechnology. In this work, we introduce CycSeq, a deep learning framework that leverages cyclic data generation and recent advances in neural architectures to predict single-cell responses under specified perturbations across multiple cell lines, while also generating the corresponding single-cell expression profiles. Specifically, CycSeq addresses the challenge of learning heterogeneous perturbation responses from unpaired single-cell gene expression data by generating pseudo-pairs through cyclic data generation. Experimental results demonstrate that CycSeq outperforms existing methods in perturbation prediction tasks, as evaluated using computational metrics such as R-squared and MAE. Furthermore, CycSeq employs a unified architecture that integrates information from multiple cell lines, enabling robust predictions even for long-tail cell lines with limited training data. The source code is publicly available at https:\/\/github.com\/yczju\/cycseq.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "10:00",
        "session": "AI4Tech (1\/3)",
        "poster_positions": "From board n93 to board n97"
    },
    {
        "id": "9050",
        "title": "Enhancing Portfolio Optimization via Heuristic-Guided Inverse Reinforcement Learning with Multi-Objective Reward and Graph-based Policy Learning",
        "authors": "Wenyi Zhang, Renjun Jia, Yanhao Wang, Dawei Cheng, Minghao Zhao, Cen Chen",
        "abstract": "Portfolio optimization encounters persistent challenges in adapting to dynamic markets due to static assumptions and high-dimensional decision spaces. Although reinforcement learning (RL) has emerged as a potential solution, conventional reward engineering often fails to capture complex market dynamics. Recent advances in deep RL and graph neural networks have attempted to enhance market microstructure modeling. However, these methods still struggle with the systematic integration of financial knowledge. To address the above issues, we propose a novel heuristic-guided inverse reinforcement learning framework for portfolio optimization. Specifically, our framework provides an interpretable expert strategy generation mechanism that takes into account sector diversification and correlation constraints. Then, a multi-objective reward optimization method is adopted to adaptively strike a balance between returns and risks. Furthermore, it also utilizes heterogeneous graph policy learning with hierarchical attention mechanisms to explicitly model inter-stock relationships. Finally, we conduct extensive experiments on real-world financial market data to demonstrate that our framework outperforms several state-of-the-art deep learning and RL baselines in terms of risk-adjusted returns. We provide case studies to showcase the ability of our framework to balance return maximization and risk containment. Our code is publicly available at  https:\/\/github.com\/ChloeWenyiZhang\/SmartFolio\/.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "9186",
        "title": "KGCL: Knowledge-Enhanced Graph Contrastive Learning for Retrosynthesis Prediction Based on Molecular Graph Editing",
        "authors": "Fengqin Yang, Dekui Zhao, Haoxuan Qiu, Yifei Li, Zhiguo Fu",
        "abstract": "Retrosynthesis, which predicts the reactants of a given target molecule, is an essential task for drug discovery. Retrosynthesis prediction based on molecular graph editing has garnered widespread attention due to excellent interpretability. Existing methods fail to effectively incorporate the chemical knowledge when learning molecular representations. To address this issue, we propose a Knowledge-enhanced Graph Contrastive Learning model (KGCL), which retrieve functional group embeddings from a chemical knowledge graph and integrate them into the atomic embeddings of the product molecule using an attention mechanism. Furthermore, we introduce a graph contrastive learning strategy that generates augmented samples using graph edits to improve the molecular graph encoder. Our proposed method outperforms the strong baseline method Graph2Edits by 1.6% and 3.2% in terms of the top-1 accuracy and top-1 round-trip accuracy on the USPTO-50K dataset, respectively, and also achieves a new state-of-the-art performance among semi-template-based methods on the USPTO-FULL dataset.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "AI4Tech: AI Enabling Technologies (2\/2)"
    },
    {
        "id": "8919",
        "title": "CogTwin: A Hybrid Cognitive Architecture Framework for Adaptable and Cognitive Digital Twins",
        "authors": "Sukanya Mandal, Noel E. O'Connor",
        "abstract": "Current Digital Twin (DT) technology lacks the cognitive capabilities needed for true autonomy and intelligent adaptation. This paper introduces CogTwin, a hybrid cognitive architecture framework for developing Cognitive Digital Twins (CDTs). CogTwin integrates a 50ms cognitive cycle inspired by human cognition, dual knowledge graphs (static Domain Knowledge Repository (DKR) and dynamic Internal Knowledge Graph (DIKG)), a hybrid attention mechanism, and self-healing capabilities. Combining symbolic, sub-symbolic, and neuro-symbolic AI, CogTwin enables real-time learning and decision-making. Simulated smart city scenarios, including traffic incident management and power outage response, demonstrate CogTwin’s potential. Preliminary performance evaluations of the pseudocode suggest feasibility of the target 50ms cycle. The architecture also incorporates explainable AI (XAI) for transparency and human-CogTwin collaboration. CogTwin contributes towards a unified theory of cognition for DTs, laying the groundwork for more sophisticated and autonomous CDTs.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "AI4Tech (2\/3)",
        "poster_positions": "From board n124 to board n130"
    },
    {
        "id": "9066",
        "title": "Optimizing the Battery-Swapping Problem in Urban E-Bike Systems with Reinforcement Learning",
        "authors": "Wenjing Li, Zhao Li, Xuanwu Liu, Ruihao Zhu, Zhenzhe Zheng, Fan Wu",
        "abstract": "E-bikes (EBs) are a key transportation mode in urban area, especially for couriers of delivery platforms, but underdeveloped EB systems can hinder courier's productivity due to limited battery capacity. Battery-swapping stations address this issue by enabling riders to exchange depleted batteries for fully charged ones. However, managing supply and demand (SnD) imbalances at these stations has become increasingly complex. To address this, we introduce a new approach that formulates the Battery-Swapping Problem (BSP) as a discrete-time Markov Decision Process (MDP) to capture the dynamics of SnD imbalances. Building on it, we propose a Wasserstein-enhanced Proximal Policy Optimization (W-PPO) algorithm, which integrates Wasserstein distance with reinforcement learning to improve the robustness against uncertainty in forecasting SnD. W-PPO provides a BSP-specific, accurate loss function that reflects reward variations between two policies under real-world simulation. The algorithm’s effectiveness is assessed using key metrics: Shared Battery Utilization Ratio (SBUR) and Battery Supply Ratio (BSR). Simulations on real-world datasets show that W-PPO achieves a 30.59% improvement in SBUR and a 16.09% increase in BSR ensures practical applicability. By optimizing battery utilization and improving EB delivery systems, this work highlights the potential of AI for creating efficient and sustainable urban transportation solutions.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "9070",
        "title": "HygMap: Representing All Types of Map Entities via Heterogeneous Hypergraph",
        "authors": "Yifan Yang, Jingyuan Wang, Xie Yu, Yibang Tang",
        "abstract": "Maps are crucial for various smart city applications as a core component of city geographic information systems (GIS). Developing effective Map Entity Representation Learning methods can extract semantic information for downstream tasks like crime rate prediction and land use classification, with significant application potential. A map comprises three entity types: land parcels, road segments, and points of interest. Most existing methods focus on a single entity type, losing inter-entity relationships and weakening representation effectiveness for real-world applications. Thus, jointly modelling and representing multiple map entity types is essential. However, designing a unified framework is challenging due to map data's unstructured, complex, and heterogeneous nature. We propose a novel method, HygMap, to represent all map entity types. We model the map as a heterogeneous hypergraph, design an encoder for map entities, and introduce a hybrid self-supervised training scheme. This architecture comprehensively captures the heterogeneous relationships among map entities at different levels. Experiments on nine downstream tasks with two real-world datasets show that our framework outperforms all baselines, with good computational efficiency and scalability.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "7774",
        "title": "Parameter-Efficient Fine-Tuning with Circulant and Diagonal Vectors",
        "authors": "Xinyu Ding, Lexuan Chen, Siyu Liao, Zhongfeng Wang",
        "abstract": "Foundation models have achieved tremendous success in different domains.\r\nHowever, their huge computation and storage complexity make these models difficult to fine-tune and also less applicable in practice. \r\nRecent study shows training in Fourier domain can be an effective fine-tuning method in terms of both model performance and number of training parameters. \r\nIn this work, we propose to further reduce the complexity by the factorization through the product of interleaved circulant and diagonal matrices. In addition, we address the case of non-square fine-tuning weights by partitioning the circulant matrix into blocks.\r\nOur method avoids the construction of weight change matrix and utilizes 1D fast Fourier transform (FFT) instead of 2D FFT. \r\nExperimental results show that our method achieves similar or better performance across various tasks with much less floating-point operations (FLOPs) and the number of trainable parameters.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "9165",
        "title": "SSPNet: Leveraging Robust Medication Recommendation with History and Knowledge",
        "authors": "Haodi Zhang, Jiawei Wen, Jiahong Li, Yuanfeng Song, Liang-Jie Zhang, Lin Ma",
        "abstract": "Automated medication recommendation is a crucial task within the domain of artificial intelligence in healthcare, where recommender systems are supposed to deliver precise, personalized drug combinations tailored to the evolving health states of patients. Existing approaches often treat clinical records (e.g., diagnoses, procedures) as isolated or unified entities, neglecting the inherent set-structured nature of medical data and the need to model interdependencies among clinical elements. To address the gap, we propose SSPNet, a novel end-to-end framework designed to process complete clinical record sets and directly generate optimal medication sets. SSPNet employs a set-based encoder to effectively capture and represent a patient's health condition from the electronic health records (EHRs), while a permutation-consistent decoder predicts the entire medication combination as a set. In addition, we introduce a novel personalized representation mechanism to capture the drugs previously used by individual patients. Extensive experiments on MIMIC-Ⅲ and MIMIC-Ⅳ data sets reveal that SSPNet surpasses existing state-of-the-art methods in the accuracy of medication recommendations.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "AI4Tech: AI Enabling Technologies (2\/2)"
    },
    {
        "id": "8722",
        "title": "ImputeINR: Time Series Imputation via Implicit Neural Representations for Disease Diagnosis with Missing Data",
        "authors": "Mengxuan Li, Ke Liu, Jialong Guo, Jiajun Bu, Hongwei Wang, Haishuai Wang",
        "abstract": "Healthcare data frequently contain a substantial proportion of missing values, necessitating effective time series imputation to support downstream disease diagnosis tasks. However, existing imputation methods focus on discrete data points and are unable to effectively model sparse data, resulting in particularly poor performance for imputing substantial missing values. In this paper, we propose a novel approach, ImputeINR, for time series imputation by employing implicit neural representations (INR) to learn continuous functions for time series. ImputeINR leverages the merits of INR in that the continuous functions are not coupled to sampling frequency and have infinite sampling frequency, allowing ImputeINR to generate fine-grained imputations even on extremely sparse observed values. Extensive experiments conducted on eight datasets with five ratios of masked values show the superior imputation performance of ImputeINR, especially for high missing ratios in time series data. We also validate that applying ImputeINR to impute missing values in healthcare data enhances the performance of downstream disease diagnosis tasks.",
        "location": "Guangzhou",
        "day": "August 31st",
        "hour": "14:45",
        "session": "AI4Tech: AI Enabling Technologies (2\/2)"
    },
    {
        "id": "9108",
        "title": "Towards Comprehensive and Prerequisite-Free Explainer for Graph Neural Networks",
        "authors": "Han Zhang, Yan Wang, Guanfeng Liu, Pengfei Ding, Huaxiong Wang, Kwok-Yan Lam",
        "abstract": "To enhance the reliability and credibility of graph neural networks (GNNs) and improve the transparency of their decision logic, a new field of explainability of GNNs (XGNN) has emerged. However, two major limitations severely degrade the performance and hinder the generalizability of existing XGNN methods: they (a) fail to capture the complete decision logic of GNNs across diverse distributions in the entire dataset's sample space, and (b) impose strict prerequisites on edge properties and GNN internal accessibility. To address these limitations, we propose OPEN, a novel cOmprehensive and Prerequisite-free Explainer for GNNs. OPEN, as the first work in the literature, can infer and partition the entire dataset's sample space into multiple environments, each containing graphs that follow a distinct distribution. OPEN further learns the decision logic of GNNs across different distributions by sampling subgraphs from each environment and analyzing their predictions, thus eliminating the need for strict prerequisites. Experimental results demonstrate that OPEN captures nearly complete decision logic of GNNs, outperforms state-of-the-art methods in fidelity while maintaining similar efficiency, and enhances robustness in real-world scenarios.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "7596",
        "title": "Eye-See-You: Reverse Pass-Through VR and Head Avatars",
        "authors": "Ankan Dash, Jingyi Gu, Guiling Wang, Chen Chen",
        "abstract": "Virtual Reality (VR) headsets, while integral to the evolving digital ecosystem, present a critical challenge: the occlusion of users' eyes and portions of their faces, which hinders visual communication and may contribute to social isolation. To address this, we introduce RevAvatar, an innovative framework that leverages AI methodologies to enable reverse pass-through technology, fundamentally transforming VR headset design and interaction paradigms. RevAvatar integrates state-of-the-art generative models and multimodal AI techniques to reconstruct high-fidelity 2D facial images and generate accurate 3D head avatars from partially observed eye and lower-face regions. This framework represents a significant advancement in AI4Tech by enabling seamless interaction between virtual and physical environments, fostering immersive experiences such as VR meetings and social engagements. Additionally, we present VR-Face, a novel dataset comprising 200,000 samples designed to emulate diverse VR-specific conditions, including occlusions, lighting variations, and distortions. By addressing fundamental limitations in current VR systems, RevAvatar exemplifies the transformative synergy between AI and next-generation technologies, offering a robust platform for enhancing human connection and interaction in virtual environments.",
        "location": "Montreal",
        "day": "August 22nd",
        "hour": "10:00",
        "session": "AI4Tech (3\/3)",
        "poster_positions": "From board n53 to board n56"
    },
    {
        "id": "9047",
        "title": "NSF-MAP: Neurosymbolic Multimodal Fusion for Robust and Interpretable Anomaly Prediction in Assembly Pipelines",
        "authors": "Chathurangi Shyalika, Renjith Prasad, Fadi El Kalach, Revathy Venkataramanan, Ramtin Zand, Ramy Harik, Amit Sheth",
        "abstract": "In modern assembly pipelines, identifying anomalies is crucial in ensuring product quality and operational efficiency. Conventional single-modality methods fail to capture the intricate relationships required for precise anomaly prediction in complex predictive environments with abundant data and multiple modalities. This paper proposes a neurosymbolic AI and fusion-based approach for multimodal anomaly prediction in assembly pipelines. We introduce a time series and image-based fusion model that leverages decision-level fusion techniques. Our research builds upon three primary novel approaches in multimodal learning: time series and image-based decision-level fusion modeling, transfer learning for fusion, and knowledge-infused learning. We evaluate the novel method using our derived and publicly available multimodal dataset and conduct comprehensive ablation studies to assess the impact of our preprocessing techniques and fusion model compared to traditional baselines. The results demonstrate that a neurosymbolic AI-based fusion approach that uses transfer learning can effectively harness the complementary strengths of time series and image data, offering a robust and interpretable approach for anomaly prediction in assembly pipelines with enhanced performance. \\noindent The datasets, codes to reproduce the results, supplementary materials, and demo are available at https:\/\/github.com\/ChathurangiShyalika\/NSF-MAP.",
        "location": "Montreal",
        "day": "August 21st",
        "hour": "15:00",
        "session": "AI4Tech (2\/3)",
        "poster_positions": "From board n124 to board n130"
    },
    {
        "id": "9151",
        "title": "Learning Dynamical Coupled Operator For High-dimensional Black-box Partial Differential Equations",
        "authors": "Yichi Wang, Tian Huang, Dandan Huang, Zhaohai Bai, Xuan Wang, Lin Ma, Haodi Zhang",
        "abstract": "The deep operator networks (DON), a class of neural operators that learn mappings between function spaces, have recently emerged as surrogate models for parametric partial differential equations (PDEs). However, their full potential for accurately approximating general black-box PDEs remains underexplored due to challenges in training stability and performance, primarily arising from difficulties in learning mappings between low-dimensional inputs and high-dimensional outputs. Furthermore, inadequate encoding of input functions and query positions limits the generalization ability of DONs. To address these challenges, we propose the Dynamical Coupled Operator (DCO), which incorporates temporal dynamics to learn coupled functions, reducing information loss and improving training robustness. Additionally, we introduce an adaptive spectral  input function encoder based on empirical mode decomposition to enhance input function representation, as well as a hybrid location encoder to improve query location encoding. We provide theoretical guarantees on the universal expressiveness of DCO, ensuring its applicability to a wide range of PDE problems. Extensive experiments on real-world, high-dimensional PDE datasets demonstrate that DCO significantly outperforms DONs.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "8958",
        "title": "AI4Contracts: LLM & RAG-Powered Encoding of Financial Derivative Contracts",
        "authors": "Maruf Ahmed Mridul, Ian Sloyan, Aparna Gupta, Oshani Seneviratne",
        "abstract": "Large Language Models (LLMs) and Retrieval Augmented Generation (RAG) are reshaping how AI systems extract and organize information from unstructured text. A key challenge is designing AI methods that can incrementally extract, structure, and validate information while preserving hierarchical and contextual relationships. We introduce CDMizer, a template driven, LLM, and RAG-based framework for structured text transformation. By leveraging depth-based retrieval and hierarchical generation, CDMizer ensures a controlled, modular process that aligns generated outputs with predefined schemas. Its template-driven approach guarantees syntactic correctness, schema adherence, and improved scalability, addressing key limitations of direct generation methods. Additionally, we propose an LLM-powered evaluation framework to assess the completeness and accuracy of structured representations. Demonstrated in the transformation of Over-the-Counter (OTC) financial derivative contracts into the Common Domain Model (CDM), CDMizer establishes a scalable foundation for AI-driven document understanding, structured synthesis, and automated validation in broader contexts.",
        "location": "Montreal",
        "day": "August 20th",
        "hour": "10:00",
        "session": "AI4Tech (1\/3)",
        "poster_positions": "From board n93 to board n97"
    },
    {
        "id": "8853",
        "title": "Horae: A Domain-Agnostic Language for Automated Service Regulation",
        "authors": "Yutao Sun, Mingshuai Chen, Tiancheng Zhao, Kangjia Zhao, He Li, Jintao Chen, Zhongyi Wang, Liqiang Lu, Xinkui Zhao, Shuiguang Deng, Jianwei Yin",
        "abstract": "Artificial intelligence is rapidly encroaching on the field of service regulation. However, existing AI-based regulation techniques are often tailored to specific application domains and thus are difficult to generalize in an automated manner. This paper presents Horae, a unified specification language for modeling (multimodal) regulation rules across a diverse set of domains. We showcase how Horae facilitates an intelligent service regulation pipeline by further exploiting a fine-tuned large language model named RuleGPT that automates the Horae modeling process, thereby yielding an end-to-end framework for fully automated intelligent service regulation. The feasibility and effectiveness of our framework are demonstrated over a benchmark of various real-world regulation domains. In particular, we show that our open-sourced, fine-tuned RuleGPT with 7B parameters suffices to outperform GPT-3.5 and perform on par with GPT-4o.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "8634",
        "title": "CoFinDiff: Controllable Financial Diffusion Model for Time Series Generation",
        "authors": "Yuki Tanaka, Ryuji Hashimoto, Takehiro Takayanagi, Zhe Piao, Yuri Murayama, Kiyoshi Izumi",
        "abstract": "The generation of synthetic financial data is a critical technology in the financial domain, addressing challenges posed by limited data availability. Traditionally, statistical models have been employed to generate synthetic data. However, these models fail to capture the stylized facts commonly observed in financial data, limiting their practical applicability. Recently, machine learning models have been introduced to address the limitations of statistical models; however, controlling synthetic data generation remains challenging. We propose CoFinDiff (Controllable Financial Diffusion model), a synthetic financial data generation model based on conditional diffusion models that accept conditions about the synthetic time series. By incorporating conditions derived from price data into the conditional diffusion model via cross-attention, CoFinDiff learns the relationships between the conditions and the data, generating synthetic data that align with arbitrary conditions. Experimental results demonstrate that: (i) synthetic data generated by CoFinDiff capture stylized facts; (ii) the generated data accurately meet specified conditions for trends and volatility; (iii) the diversity of the generated data surpasses that of the baseline models; and (iv) models trained on CoFinDiff-generated data achieve improved performance in deep hedging task.",
        "location": "Montreal",
        "day": "August 22nd",
        "hour": "10:00",
        "session": "AI4Tech (3\/3)",
        "poster_positions": "From board n53 to board n56"
    },
    {
        "id": "768",
        "title": "Map2Traj: Street Map Piloted Zero-shot Trajectory Generation Method for Wireless Network Optimization",
        "authors": "Zhenyu Tao, Wei Xu, Xiaohu You",
        "abstract": "In modern wireless networks, user mobility modeling plays a pivotal role in learning-based network optimization, particularly in tasks such as user association and resource allocation. Traditional random mobility models, e.g., random waypoint and Gauss Markov model, often fail to accurately capture the distribution patterns of users within real-world areas. While trace-based mobility models and advanced learning-based trajectory generation methods offer improvements, they are frequently limited by the scarcity of real-world trajectory data in target areas, primarily due to privacy concerns. This paper introduces Map2Traj, a novel zero-shot trajectory generation method that leverages the diffusion model to capture the intrinsic relationship between street maps and user mobility. With solely the street map of an unobserved area, Map2Traj generates synthetic user trajectories that closely resemble the real-world ones in trajectory pattern and spatial distribution. This enables the creation of high-fidelity individual user channel states and an accurate representation of the overall network user distribution, facilitating effective wireless network optimization. Extensive experiments across multiple regions in Xi'an and Chengdu, China demonstrate the effectiveness of our proposed method for zero-shot trajectory generation. A case study applying Map2Traj to user association and load balancing in wireless networks is also presented to validate its efficacy in network optimization.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    },
    {
        "id": "8493",
        "title": "Diversity-Aware Reinforcement Learning for de novo Drug Design",
        "authors": "Hampus Gummesson Svensson, Christian Tyrchan, Ola Engkvist, Morteza Haghir Chehreghani",
        "abstract": "Fine-tuning a pre-trained generative model has demonstrated good performance in generating promising drug molecules. The fine-tuning task is often formulated as a reinforcement learning problem, where previous methods efficiently learn to optimize a reward function to generate potential drug molecules. Nevertheless, in the absence of an adaptive update mechanism for the reward function, the optimization process can become stuck in local optima. The efficacy of the optimal molecule in a local optimization may not translate to usefulness in the subsequent drug optimization process or as a potential standalone clinical candidate. Therefore, it is important to generate a diverse set of promising molecules. Prior work has modified the reward function by penalizing structurally similar molecules, primarily focusing on finding molecules with higher rewards. To date, no study has comprehensively examined how different adaptive update mechanisms for the reward function influence the diversity of generated molecules. In this work, we investigate a wide range of intrinsic motivation methods and strategies to penalize the extrinsic reward, and how they affect the diversity of the set of generated molecules. Our experiments reveal that combining structure- and prediction-based methods generally yields better results in terms of diversity.",
        "location": "Montreal",
        "day": "August 22nd",
        "hour": "10:00",
        "session": "AI4Tech (3\/3)",
        "poster_positions": "From board n53 to board n56"
    },
    {
        "id": "9140",
        "title": "PCAN: A Pandemic-Compatible Attentive Neural Network for Retail Sales Forecasting",
        "authors": "Fan Li, Guoxuan Wang, Huiyu Chu, Dawei Cheng, Xiaoyang Wang",
        "abstract": "The outbreak of pandemic has a huge impact on production and consumption in the business world, especially for the retail sector. As a crucial component of decision-support technology in the retail industry, sales forecasting is significant for production planning and optimizing the supply of essential goods during the pandemic. However, due to the irregular fluctuation pattern caused by uncertainty and the complex temporal correlation between multiple covariates and sales, there is still no effective approach for sales forecasting in this extreme event. To fill this gap, we propose a Pandemic-Compatible Attentive Network (PCAN) for retail sales forecasting. Specifically, to capture the irregular fluctuation patterns from the sales series, we design a fluctuation attention mechanism based on association discrepancy in the time series. Then, a parallel attention module is developed to learn the complex relationship between target sales and various dynamic influence factors in a decoupled manner. Finally, we introduce a novel rectification decoding strategy to indicate fluctuation points in prediction. By evaluating PCAN on four real-world retail food datasets from the SF Express international supply chain system, the results show that our method achieves superior performance over the existing state-of-the-art baselines. The model has been deployed in the supply chain system as a fundamental component to serve a world-leading food retailer.",
        "location": "Guangzhou",
        "day": "August 30th",
        "hour": "11:00",
        "session": "AI4Tech: AI Enabling Technologies (1\/2)"
    }
]