cloudroam
2025-04-14 6f5277ea8bde6be1aa1998315a77aed9c0809900
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
def check_data_format(file_path):
    """检查NER数据文件格式"""
    with open(file_path, 'r', encoding='utf-8') as f:
        for line_num, line in enumerate(f, 1):
            line = line.strip()
            if line:  # 非空行
                parts = line.split()
                if len(parts) != 2:
                    print(f"错误:第{line_num}行格式不正确")
                    print(f"内容: '{line}'")
                    print(f"期望格式: '字符 标签'")
                    print("---")
 
if __name__ == "__main__":
    check_data_format("data/sms_ner.txt")