1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
| import csv from datetime import datetime
def validate_id_number(id_num): if len(id_num) != 18: return False if not id_num[:-1].isdigit(): return False if not (id_num[-1].isdigit() or id_num[-1] == 'X'): return False weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2] digits = id_num[:17] s = 0 for i in range(17): s += int(digits[i]) * weights[i] remainder = s % 11 check_chars = {0: '1', 1: '0', 2: 'X', 3: '9', 4: '8', 5: '7', 6: '6', 7: '5', 8: '4', 9: '3', 10: '2'} expected_char = check_chars[remainder] if id_num[-1].upper() != expected_char: return False return True
def validate_gender(id_num, gender): if len(id_num) != 18: return False try: gender_code = int(id_num[16]) if gender_code % 2 == 1: if gender != '男': return False else: if gender != '女': return False except ValueError: return False return True
def validate_birth_date(id_num, birth_date): if len(id_num) != 18: return False id_birth_str = id_num[6:14] y = id_birth_str[0:4] m = id_birth_str[4:6] d = id_birth_str[6:8] id_birth_clean = f"{y}-{m}-{d}" if birth_date != id_birth_clean: return False return True
def validate_phone(phone): if len(phone) != 11: return False if not phone.isdigit(): return False if phone[0] != '1': return False return True
def validate_time_logic(birth_date_str, register_time_str, last_login_time_str): try: birth_date = datetime.strptime(birth_date_str, '%Y-%m-%d').date() register_time = datetime.strptime(register_time_str, '%Y-%m-%d %H:%M:%S') last_login_time = datetime.strptime(last_login_time_str, '%Y-%m-%d %H:%M:%S') except ValueError: return False if birth_date > register_time.date(): return False if register_time > last_login_time: return False return True
def validate_name(name): if len(name)< 2 or len(name) > 4: return False for char in name: if not ('\u4e00' <= char <= '\u9fa5'): return False return True
input_file = 'data.csv' output_file = 'clean_data.csv'
with open(input_file, 'r', newline='') as infile, open(output_file, 'w', newline='') as outfile: reader = csv.DictReader(infile) fieldnames = reader.fieldnames writer = csv.DictWriter(outfile, fieldnames=fieldnames) writer.writeheader() for row in reader: if (validate_id_number(row['身份证号']) and validate_gender(row['身份证号'], row['性别']) and validate_birth_date(row['身份证号'], row['出生日期']) and validate_phone(row['手机号']) and validate_time_logic(row['出生日期'], row['注册时间'], row['最后登录时间']) and validate_name(row['姓名'])): writer.writerow(row)
|