import csv
from io import StringIO
def csv_to_list_of_dicts_auto_detect(file_path, encoding='utf-8', sample_size=1024):
"""
Converts a CSV file into a list of Python dictionaries, with auto-detection
of delimiter, quote character, and header.
Each dictionary represents a row, with column headers as keys.
Handles different CSV variations (Windows/Unix line endings).
Args:
file_path (str): The path to the CSV file.
encoding (str, optional): The encoding of the CSV file. Defaults to 'utf-8'.
Common alternatives include 'latin-1' or 'cp1252'.
sample_size (int, optional): The number of bytes to read from the beginning
of the file to sniff the dialect. Defaults to 1024.
Larger samples increase accuracy but can be slower.
Returns:
list[dict]: A list of dictionaries, where each dictionary is a row.
Returns an empty list if the file is empty or cannot be processed.
Returns None if there's a critical error (e.g., file not found, encoding issue).
"""
if not isinstance(file_path, str) or not file_path.strip():
print("Error: file_path must be a non-empty string.")
return None
if not isinstance(sample_size, int) or sample_size <= 0:
print("Error: sample_size must be a positive integer.")
return None
data = []
try:
with open(file_path, 'r', newline='', encoding=encoding) as csvfile:
sample = csvfile.read(sample_size)
csvfile.seek(0)
sniffer = csv.Sniffer()
try:
dialect = sniffer.sniff(sample)
except csv.Error:
print(f"Warning: Could not auto-detect CSV dialect for '{file_path}'. "
"Falling back to default comma delimiter and double quote character.")
dialect = 'excel'
reader = csv.DictReader(csvfile, dialect=dialect)
for row in reader:
data.append(row)
return data
except FileNotFoundError:
print(f"Error: The file '{file_path}' was not found.")
return None
except UnicodeDecodeError:
print(f"Error: Could not decode the file '{file_path}' with encoding '{encoding}'. "
"Try a different encoding (e.g., 'latin-1', 'cp1252').")
return None
except Exception as e:
print(f"An unexpected error occurred during processing: {e}")
return None
if __name__ == "__main__":
standard_csv_content = """name,age,city
Alice,30,"New York"
Bob,24,London
"Charlie, Jr.",35,Paris
"""
with open("standard_data.csv", "w", newline='') as f:
f.write(standard_csv_content)
semicolon_csv_content = """id;product;price
1;Laptop;1200.00
2;Mouse;25.50
3;"Keyboard; Gaming Edition";75.99
"""
with open("semicolon_data_auto.csv", "w", newline='') as f:
f.write(semicolon_csv_content)
tab_csv_content = "Fruit\tColor\tTaste\nApple\tRed\tSweet\nBanana\tYellow\tSweet\nLemon\tYellow\tSour\n"
with open("tab_data.csv", "w", newline='') as f:
f.write(tab_csv_content)
mixed_line_ending_csv = "header1,header2\nvalue1,value2\r\nvalue3,value4\n"
with open("mixed_line_data.csv", "w", newline='') as f:
f.write(mixed_line_ending_csv)
print("--- Processing standard_data.csv (auto-detect) ---")
standard_data = csv_to_list_of_dicts_auto_detect("standard_data.csv")
if standard_data:
for row_dict in standard_data:
print(row_dict)
print("\n")
print("--- Processing semicolon_data_auto.csv (auto-detect) ---")
semicolon_data_auto = csv_to_list_of_dicts_auto_detect("semicolon_data_auto.csv")
if semicolon_data_auto:
for row_dict in semicolon_data_auto:
print(row_dict)
print("\n")
print("--- Processing tab_data.csv (auto-detect) ---")
tab_data_auto = csv_to_list_of_dicts_auto_detect("tab_data.csv")
if tab_data_auto:
for row_dict in tab_data_auto:
print(row_dict)
print("\n")
print("--- Processing mixed_line_data.csv (auto-detect) ---")
mixed_data_auto = csv_to_list_of_dicts_auto_detect("mixed_line_data.csv")
if mixed_data_auto:
for row_dict in mixed_data_auto:
print(row_dict)
print("\n")
print("--- Processing non_existent_file_auto.csv ---")
non_existent_data_auto = csv_to_list_of_dicts_auto_detect("non_existent_file_auto.csv")
print(f"Result for non-existent file: {non_existent_data_auto}\n")
import os
os.remove("standard_data.csv")
os.remove("semicolon_data_auto.csv")
os.remove("tab_data.csv")
os.remove("mixed_line_data.csv")