1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
| def check_data_format(file_path):
| """检查NER数据文件格式"""
| with open(file_path, 'r', encoding='utf-8') as f:
| for line_num, line in enumerate(f, 1):
| line = line.strip()
| if line: # 非空行
| parts = line.split()
| if len(parts) != 2:
| print(f"错误:第{line_num}行格式不正确")
| print(f"内容: '{line}'")
| print(f"期望格式: '字符 标签'")
| print("---")
|
| if __name__ == "__main__":
| check_data_format("data/sms_ner.txt")
|
|