fix
cloudroam
6 天以前 e6fed94443177826cf7497a85e9cdcfc7c43ee21
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from ner_config import RepaymentNERConfig, FlightNERConfig, TrainNERConfig
 
 
# 脚本:校验非法格式
 
def check_data_labels():
    label_set = set()
    line_num = 0
    
    with open(FlightNERConfig.DATA_PATH, 'r', encoding='utf-8') as f:
        for line in f:
            line_num += 1
            line = line.strip()
            if line:
                try:
                    _, label = line.split(maxsplit=1)
                    if label not in FlightNERConfig.LABELS:
                        print(f"行 {line_num}: 发现非法标签 '{label}'")
                        label_set.add(label)
                except Exception as e:
                    print(f"行 {line_num}: 处理出错 - {str(e)}")
    
    print("\n发现的所有非法标签:")
    for label in sorted(label_set):
        print(f"- {label}")
 
if __name__ == "__main__":
    check_data_labels()