Parse CSV with auto dialect detection

import csv
from io import StringIO # Used for handling string as file-like object in example

def csv_to_list_of_dicts_auto_detect(file_path, encoding='utf-8', sample_size=1024):
    """
    Converts a CSV file into a list of Python dictionaries, with auto-detection
    of delimiter, quote character, and header.

    Each dictionary represents a row, with column headers as keys.
    Handles different CSV variations (Windows/Unix line endings).

    Args:
        file_path (str): The path to the CSV file.
        encoding (str, optional): The encoding of the CSV file. Defaults to 'utf-8'.
                                  Common alternatives include 'latin-1' or 'cp1252'.
        sample_size (int, optional): The number of bytes to read from the beginning
                                     of the file to sniff the dialect. Defaults to 1024.
                                     Larger samples increase accuracy but can be slower.

    Returns:
        list[dict]: A list of dictionaries, where each dictionary is a row.
                    Returns an empty list if the file is empty or cannot be processed.
                    Returns None if there's a critical error (e.g., file not found, encoding issue).
    """
    if not isinstance(file_path, str) or not file_path.strip():
        print("Error: file_path must be a non-empty string.")
        return None
    if not isinstance(sample_size, int) or sample_size <= 0:
        print("Error: sample_size must be a positive integer.")
        return None

    data = []
    try:
        with open(file_path, 'r', newline='', encoding=encoding) as csvfile:
            # Read a sample of the file to allow the sniffer to work
            # It's important to rewind the file pointer after sniffing.
            sample = csvfile.read(sample_size)
            csvfile.seek(0) # Rewind to the beginning of the file

            sniffer = csv.Sniffer()

            # Attempt to sniff the dialect
            try:
                dialect = sniffer.sniff(sample)
            except csv.Error:
                # If sniffing fails (e.g., sample too small, very unusual format)
                # Fallback to default CSV parameters
                print(f"Warning: Could not auto-detect CSV dialect for '{file_path}'. "
                      "Falling back to default comma delimiter and double quote character.")
                dialect = 'excel' # Use the standard Excel dialect as a fallback

            # Check if the file has a header. This is less reliable than dialect sniffing
            # for delimiter/quotechar but useful for DictReader.
            # DictReader will assume the first row is a header if it doesn't fail.
            # If you specifically wanted to know if the sniffer *thought* it had a header:
            # has_header = sniffer.has_header(sample)

            # Use DictReader with the detected dialect.
            # csv.DictReader automatically uses the first row as fieldnames.
            reader = csv.DictReader(csvfile, dialect=dialect)

            for row in reader:
                data.append(row)
        return data
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
        return None
    except UnicodeDecodeError:
        print(f"Error: Could not decode the file '{file_path}' with encoding '{encoding}'. "
              "Try a different encoding (e.g., 'latin-1', 'cp1252').")
        return None
    except Exception as e:
        print(f"An unexpected error occurred during processing: {e}")
        return None

# --- Example Usage with Auto-Detection ---
if __name__ == "__main__":
    # Create dummy CSV files for demonstration
    # Standard CSV (comma, double quote)
    standard_csv_content = """name,age,city
Alice,30,"New York"
Bob,24,London
"Charlie, Jr.",35,Paris
"""
    with open("standard_data.csv", "w", newline='') as f:
        f.write(standard_csv_content)

    # Semicolon delimited, potentially with quotes and spaces
    semicolon_csv_content = """id;product;price
1;Laptop;1200.00
2;Mouse;25.50
3;"Keyboard; Gaming Edition";75.99
"""
    with open("semicolon_data_auto.csv", "w", newline='') as f:
        f.write(semicolon_csv_content)

    # Tab delimited
    tab_csv_content = "Fruit\tColor\tTaste\nApple\tRed\tSweet\nBanana\tYellow\tSweet\nLemon\tYellow\tSour\n"
    with open("tab_data.csv", "w", newline='') as f:
        f.write(tab_csv_content)

    # CSV with mixed line endings (though `newline=''` handles this implicitly)
    # The `csv` module's `newline=''` handles reading mixed line endings automatically.
    # The sniffer will still work regardless of the line ending style.
    mixed_line_ending_csv = "header1,header2\nvalue1,value2\r\nvalue3,value4\n"
    with open("mixed_line_data.csv", "w", newline='') as f:
        f.write(mixed_line_ending_csv)


    print("--- Processing standard_data.csv (auto-detect) ---")
    standard_data = csv_to_list_of_dicts_auto_detect("standard_data.csv")
    if standard_data:
        for row_dict in standard_data:
            print(row_dict)
    print("\n")

    print("--- Processing semicolon_data_auto.csv (auto-detect) ---")
    semicolon_data_auto = csv_to_list_of_dicts_auto_detect("semicolon_data_auto.csv")
    if semicolon_data_auto:
        for row_dict in semicolon_data_auto:
            print(row_dict)
    print("\n")

    print("--- Processing tab_data.csv (auto-detect) ---")
    tab_data_auto = csv_to_list_of_dicts_auto_detect("tab_data.csv")
    if tab_data_auto:
        for row_dict in tab_data_auto:
            print(row_dict)
    print("\n")

    print("--- Processing mixed_line_data.csv (auto-detect) ---")
    mixed_data_auto = csv_to_list_of_dicts_auto_detect("mixed_line_data.csv")
    if mixed_data_auto:
        for row_dict in mixed_data_auto:
            print(row_dict)
    print("\n")

    print("--- Processing non_existent_file_auto.csv ---")
    non_existent_data_auto = csv_to_list_of_dicts_auto_detect("non_existent_file_auto.csv")
    print(f"Result for non-existent file: {non_existent_data_auto}\n")

    # Clean up dummy files
    import os
    os.remove("standard_data.csv")
    os.remove("semicolon_data_auto.csv")
    os.remove("tab_data.csv")
    os.remove("mixed_line_data.csv")