AlkantarClanX12

Your IP : 3.147.86.143


Current Path : /home/thanudqk/www/wp-content/plugins/tablepress/libraries/
Upload File :
Current File : /home/thanudqk/www/wp-content/plugins/tablepress/libraries/csv-parser.class.php

<?php
/**
 * CSV Parsing class for TablePress, used for import of CSV files
 *
 * @package TablePress
 * @subpackage Import
 * @author Tobias Bäthge
 * @since 1.0.0
 */

// Prohibit direct script loading.
defined( 'ABSPATH' ) || die( 'No direct script access allowed!' );

/**
 * CSV Parsing class
 *
 * @package TablePress
 * @subpackage Import
 * @author Tobias Bäthge
 * @since 1.0.0
 */
class CSV_Parser {

	/**
	 * The used character for the enclosure of a cell. Defaults to quotation mark ".
	 *
	 * @since 1.0.0
	 */
	protected string $enclosure = '"';

	/**
	 * Number of rows to analyze when attempting to auto-detect the CSV delimiter.
	 *
	 * @since 1.0.0
	 */
	protected int $delimiter_search_max_lines = 15;

	/**
	 * Characters to ignore when attempting to auto-detect delimiter.
	 *
	 * @since 1.0.0
	 */
	protected string $non_delimiter_chars = "a-zA-Z0-9\n\r";

	/**
	 * The preferred delimiter characters, only used when all filtering method return multiple possible delimiters (happens very rarely).
	 * There must not be more than 9 characters in the preferred delimiter character list, see `_check_delimiter_count()`.
	 *
	 * @since 1.0.0
	 */
	protected string $preferred_delimiter_chars = ";,\t";

	/**
	 * The CSV data string that shall be parsed to an array.
	 *
	 * @since 1.0.0
	 */
	protected string $import_data = '';

	/**
	 * The error state while parsing input data.
	 *
	 * 0 = No errors found. Everything should be fine.
	 * 1 = A hopefully correctable syntax error was found.
	 * 2 = The enclosure character was found in a non-enclosed field. This means the file is either corrupt,
	 *     or does not follow the common CSV standard. Please validate the parsed data manually.
	 *
	 * @since 1.0.0
	 */
	public int $error = 0;

	/**
	 * Detailed error information.
	 *
	 * @since 1.0.0
	 * @var array<string, array<string, int|string>>
	 */
	public array $error_info = array();

	/**
	 * Class Constructor.
	 *
	 * @since 1.0.0
	 */
	public function __construct() {
		// Unused.
	}

	/**
	 * Load CSV data that shall be parsed.
	 *
	 * @since 1.0.0
	 *
	 * @param string $data Data to be parsed.
	 */
	public function load_data( string $data ): void {
		// Check for mandatory trailing line break.
		if ( ! str_ends_with( $data, "\n" ) ) {
			$data .= "\n";
		}
		$this->import_data = $data;
	}

	/**
	 * Detect the CSV delimiter, by analyzing some rows to determine the most probable delimiter character.
	 *
	 * @since 1.0.0
	 *
	 * @return string Most probable delimiter character.
	 */
	public function find_delimiter(): string {
		$data = &$this->import_data;

		$delimiter_count = array();
		$enclosed = false;
		$current_line = 0;

		// Walk through each character in the CSV string (up to $this->delimiter_search_max_lines) and search potential delimiter characters.
		$data_length = strlen( $data );
		for ( $i = 0; $i < $data_length; $i++ ) {
			$prev_char = ( $i - 1 >= 0 ) ? $data[ $i - 1 ] : '';
			$curr_char = $data[ $i ];
			$next_char = ( $i + 1 < $data_length ) ? $data[ $i + 1 ] : '';

			if ( $curr_char === $this->enclosure ) {
				// Open and closing quotes.
				if ( ! $enclosed || $next_char !== $this->enclosure ) {
					$enclosed = ! $enclosed; // Flip bool.
				} elseif ( $enclosed ) {
					++$i; // Skip next character.
				}
			} elseif ( ( ( "\n" === $curr_char && "\r" !== $prev_char ) || "\r" === $curr_char ) && ! $enclosed ) {
				// Reached end of a line.
				++$current_line;
				if ( $current_line >= $this->delimiter_search_max_lines ) {
					break;
				}
			} elseif ( ! $enclosed ) {
				// At this point, $curr_char seems to be used as a delimiter, as it is not enclosed.
				// Count $curr_char if it is not in the $this->non_delimiter_chars list.
				if ( 0 === preg_match( '#[' . $this->non_delimiter_chars . ']#i', $curr_char ) ) {
					if ( ! isset( $delimiter_count[ $curr_char ][ $current_line ] ) ) {
						$delimiter_count[ $curr_char ][ $current_line ] = 0; // Initialize empty.
					}
					++$delimiter_count[ $curr_char ][ $current_line ];
				}
			}
		}

		// Find most probable delimiter, by sorting their counts.
		$potential_delimiters = array();
		foreach ( $delimiter_count as $char => $line_counts ) {
			$is_possible_delimiter = $this->_check_delimiter_count( $char, $line_counts, $current_line );
			if ( false !== $is_possible_delimiter ) {
				$potential_delimiters[ $is_possible_delimiter ] = $char;
			}
		}
		ksort( $potential_delimiters );

		// If no valid delimiter was found, use the character that was found in most rows.
		if ( empty( $potential_delimiters ) ) {
			$delimiter_counts = array_map( 'count', $delimiter_count );
			arsort( $delimiter_counts, SORT_NUMERIC );
			$potential_delimiters = array_keys( $delimiter_counts );
		}

		// If still no delimiter was found, fall back to a comma.
		if ( empty( $potential_delimiters ) ) {
			$potential_delimiters = array( ',' );
		}

		// Return first array element, as that has the highest count.
		return array_shift( $potential_delimiters );
	}

	/**
	 * Check if passed character can be a delimiter, by checking counts in each line.
	 *
	 * @since 1.0.0
	 *
	 * @param string $char         Character to check.
	 * @param int[]  $line_counts  Counts for the characters in the lines.
	 * @param int    $number_lines Number of lines.
	 * @return bool|string False if delimiter is not possible, string to be used as a sort key if character could be a delimiter.
	 */
	protected function _check_delimiter_count( string $char, array $line_counts, int $number_lines ) /* : bool|string */ {
		// Was the potential delimiter found in every line?
		if ( count( $line_counts ) !== $number_lines ) {
			return false;
		}

		// Check if the count in every line is the same (or one higher for an "almost").
		$first = null;
		$equal = null;
		$almost = false;
		foreach ( $line_counts as $count ) {
			if ( is_null( $first ) ) {
				$first = $count;
			} elseif ( $count === $first && false !== $equal ) {
				$equal = true;
			} elseif ( $count === $first + 1 && false !== $equal ) {
				$equal = true;
				$almost = true;
			} else {
				$equal = false;
			}
		}
		// Check equality only if there's more than one line.
		if ( $number_lines > 1 && ! $equal ) {
			return false;
		}

		// At this point, count is equal in all lines, so determine a string to sort priority.
		$match = ( $almost ) ? 2 : 1;
		// There must not be more than 9 characters in the preferred delimiter character list.
		$pref = strpos( $this->preferred_delimiter_chars, $char );
		if ( false === $pref ) {
			$pref = 9;
		}
		return $pref . $match . '.' . ( 99999 - $first );
	}

	/**
	 * Parse CSV string into a two-dimensional array.
	 *
	 * @since 1.0.0
	 *
	 * @param string $delimiter Delimiter character for the CSV parsing.
	 * @return array<int, array<int, string>> Two-dimensional array with the data from the CSV string.
	 */
	public function parse( string $delimiter ): array {
		$data = &$this->import_data;

		// Filter delimiter from the list, if it is a whitespace character.
		$white_spaces = str_replace( $delimiter, '', " \t\x0B\0" );

		$rows = array(); // Complete rows.
		$row = array(); // Row that is currently built.
		$column = 0; // Current column index.
		$cell_content = ''; // Content of the currently processed cell.
		$enclosed = false;
		$was_enclosed = false; // To determine if the cell content will be trimmed of whitespace (only for enclosed cells).

		// Walk through each character in the CSV string.
		$data_length = strlen( $data );
		for ( $i = 0; $i < $data_length; $i++ ) {
			$curr_char = $data[ $i ];
			$next_char = ( $i + 1 < $data_length ) ? $data[ $i + 1 ] : '';

			if ( $curr_char === $this->enclosure ) {
				// Open/close quotes, and inline quotes.
				if ( ! $enclosed ) {
					if ( '' === ltrim( $cell_content, $white_spaces ) ) {
						$enclosed = true;
						$was_enclosed = true;
					} else {
						$this->error = 2;
						$error_line = count( $rows ) + 1;
						$error_column = $column + 1;
						if ( ! isset( $this->error_info[ "{$error_line}-{$error_column}" ] ) ) {
							$this->error_info[ "{$error_line}-{$error_column}" ] = array(
								'type'   => 2,
								'info'   => "Syntax error found in line {$error_line}. Non-enclosed fields can not contain double-quotes.",
								'line'   => $error_line,
								'column' => $error_column,
							);
						}
						$cell_content .= $curr_char;
					}
				} elseif ( $next_char === $this->enclosure ) {
					// Enclosure character within enclosed cell (" encoded as "").
					$cell_content .= $curr_char;
					++$i; // Skip next character.
				} elseif ( $next_char !== $delimiter && "\r" !== $next_char && "\n" !== $next_char ) {
					// for-loop (instead of while-loop) that skips whitespace.
					for ( $x = ( $i + 1 ); isset( $data[ $x ] ) && '' === ltrim( $data[ $x ], $white_spaces ); $x++ ) { // phpcs:ignore Generic.CodeAnalysis.ForLoopWithTestFunctionCall.NotAllowed,Generic.CodeAnalysis.EmptyStatement.DetectedFor
						// Action is in iterator check.
					}
					if ( $data[ $x ] === $delimiter ) {
						$enclosed = false;
						$i = $x;
					} else {
						if ( $this->error < 1 ) {
							$this->error = 1;
						}
						$error_line = count( $rows ) + 1;
						$error_column = $column + 1;
						if ( ! isset( $this->error_info[ "{$error_line}-{$error_column}" ] ) ) {
							$this->error_info[ "{$error_line}-{$error_column}" ] = array(
								'type'   => 1,
								'info'   => "Syntax error found in line {$error_line}. A single double-quote was found within an enclosed string. Enclosed double-quotes must be escaped with a second double-quote.",
								'line'   => $error_line,
								'column' => $error_column,
							);
						}
						$cell_content .= $curr_char;
						$enclosed = false;
					}
				} else {
					// The " was the closing one for the cell.
					$enclosed = false;
				}
			} elseif ( ( $curr_char === $delimiter || "\n" === $curr_char || "\r" === $curr_char ) && ! $enclosed ) {
				// End of cell (by $delimiter), or end of line (by line break, and not enclosed!).

				$row[ $column ] = ( $was_enclosed ) ? $cell_content : trim( $cell_content );
				$cell_content = '';
				$was_enclosed = false;
				++$column;

				// End of line.
				if ( "\n" === $curr_char || "\r" === $curr_char ) {
					// Append completed row.
					$rows[] = $row;
					$row = array();
					$column = 0;
					if ( "\r" === $curr_char && "\n" === $next_char ) {
						// Skip next character in \r\n line breaks.
						++$i;
					}
				}
			} else {
				// Append character to current cell.
				$cell_content .= $curr_char;
			}
		}

		return $rows;
	}

} // class CSV_Parser