AlkantarClanX12

Your IP : 3.149.250.65


Current Path : /home/thanudqk/public_html/wp-content/plugins/tablepress/libraries/
Upload File :
Current File : //home/thanudqk/public_html/wp-content/plugins/tablepress/libraries/html-parser.class.php

<?php
/**
 * HTML Parsing class for TablePress, used for import of HTML files.
 *
 * @package TablePress
 * @subpackage Import
 * @author Tobias Bäthge
 * @since 2.0.0
 */

// Prohibit direct script loading.
defined( 'ABSPATH' ) || die( 'No direct script access allowed!' );

/**
 * HTML Parsing class
 *
 * @package TablePress
 * @subpackage Import
 * @author Tobias Bäthge
 * @since 2.0.0
 */
abstract class HTML_Parser {

	/**
	 * Parses HTML string into a two-dimensional array, maybe with options.
	 *
	 * @since 2.0.0
	 *
	 * @param string $html Data to be parsed.
	 * @return array<string, mixed>|WP_Error Array with table data and options (current table head and foot row) on success, WP_Error on error.
	 */
	public static function parse( string $html ) /* : array|WP_Error */ {
		if ( false === stripos( $html, '<table' ) || false === stripos( $html, '</table>' ) ) {
			return new WP_Error( 'table_import_html_no_table_found' );
		}

		// Prepend XML declaration, for better encoding support.
		$full_html = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>' . $html;
		if ( function_exists( 'libxml_disable_entity_loader' ) ) {
			/*
			 * Don't expand external entities, see https://websec.io/2012/08/27/Preventing-XXE-in-PHP.html.
			 * Silence warnings as the function is deprecated in PHP 8, but can be necessary with LIBXML_NOENT being defined, see https://core.trac.wordpress.org/changeset/50714.
			 */
			@libxml_disable_entity_loader( true ); // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged,Generic.PHP.DeprecatedFunctions.Deprecated
		}
		// No warnings/errors raised, but stored internally.
		libxml_use_internal_errors( true );
		$dom = new DOMDocument( '1.0', 'UTF-8' );
		// No strict checking for invalid HTML.
		$dom->strictErrorChecking = false; // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
		$result = $dom->loadHTML( $full_html );
		if ( ! $result ) {
			return new WP_Error( 'table_import_html_dom_load_html_failed' );
		}
		$dom_tables = $dom->getElementsByTagName( 'table' );
		if ( 0 === count( $dom_tables ) ) {
			return new WP_Error( 'table_import_html_dom_get_tables' );
		}
		libxml_clear_errors(); // Clear errors so that we only catch those inside the table in the next line.
		$table = simplexml_import_dom( $dom_tables->item( 0 ) ); // @phpstan-ignore argument.type
		if ( is_null( $table ) ) {
			return new WP_Error( 'table_import_html_simplexml_import_dom_failed' );
		}

		$errors = libxml_get_errors();
		libxml_clear_errors();
		if ( ! empty( $errors ) ) {
			$output = '<strong>' . __( 'The imported file contains errors:', 'tablepress' ) . '</strong><br><br>';
			foreach ( $errors as $error ) {
				switch ( $error->level ) {
					case LIBXML_ERR_WARNING:
						$output .= "Warning {$error->code}: {$error->message} in line {$error->line}, column {$error->column}<br>";
						break;
					case LIBXML_ERR_ERROR:
						$output .= "Error {$error->code}: {$error->message} in line {$error->line}, column {$error->column}<br>";
						break;
					case LIBXML_ERR_FATAL:
						$output .= "Fatal Error {$error->code}: {$error->message} in line {$error->line}, column {$error->column}<br>";
						break;
				}
			}
			wp_die( $output, 'Import Error', array( 'response' => 200, 'back_link' => true ) );
		}

		$html_table = array(
			'data'    => array(),
			'options' => array(),
		);
		if ( isset( $table->thead ) ) {
			$head_rows = self::_import_html_rows( $table->thead[0]->tr ); // @phpstan-ignore property.nonObject
			$html_table['data'] = array_merge( $html_table['data'], $head_rows );
			$html_table['options']['table_head'] = count( $head_rows );
		}
		if ( isset( $table->tbody ) ) {
			$html_table['data'] = array_merge( $html_table['data'], self::_import_html_rows( $table->tbody[0]->tr ) ); // @phpstan-ignore property.nonObject
		}
		if ( isset( $table->tr ) ) {
			$html_table['data'] = array_merge( $html_table['data'], self::_import_html_rows( $table->tr ) );
		}
		if ( isset( $table->tfoot ) ) {
			$foot_rows = self::_import_html_rows( $table->tfoot[0]->tr ); // @phpstan-ignore property.nonObject
			$html_table['data'] = array_merge( $html_table['data'], $foot_rows );
			$html_table['options']['table_foot'] = count( $foot_rows );
		}

		return $html_table;
	}

	/**
	 * Converts table HTML rows to an array.
	 *
	 * @since 2.0.0
	 *
	 * @param SimpleXMLElement $element XMLElement.
	 * @return array<int, array<int, string>> SimpleXMLElement exported to an array.
	 */
	protected static function _import_html_rows( SimpleXMLElement $element ): array {
		$rows = array(); // Container for the table data.
		$rowspans = array(); // Container for information about rowspans in rows that follow the currently processed row.

		$row_idx = 0;
		foreach ( $element as $row ) {
			// If all cells in a row should be merged with the cells in the row above, add the trigger word to each of them (should be very rare).
			while ( isset( $rowspans[ $row_idx ] ) && count( $rowspans[ $row_idx ] ) === count( $rows[ $row_idx - 1 ] ) ) { // phpcs:ignore Squiz.PHP.DisallowSizeFunctionsInLoops.Found
				$rows[] = $rowspans[ $row_idx ];
				++$row_idx;
			}

			$new_row = array();
			$column_idx = 0;
			foreach ( $row as $cell ) {
				// If a cell in a row should be merged with the cell above it, add the trigger word to it.
				while ( isset( $rowspans[ $row_idx ][ $column_idx ] ) ) {
					$new_row[] = $rowspans[ $row_idx ][ $column_idx ];
					++$column_idx;
				}

				$cell_xml = $cell->asXML();

				// Get content between <td>...</td>, or <th>...</th>, possibly with HTML.
				if ( false !== $cell_xml && 1 === preg_match( '#<t[d|h].*?>(.*)</t[d|h]>#is', $cell_xml, $matches ) ) {
					/*
					 * Decode HTML entities again, as there might be some left especially in attributes of HTML tags in the cells,
					 * see https://www.php.net/manual/en/simplexmlelement.asxml.php#107137.
					 */
					$new_row[] = html_entity_decode( $matches[1], ENT_NOQUOTES, 'UTF-8' );

					// Search for colspan and rowspan attributes in the cell's HTML tag.
					$colspan = 1;
					$rowspan = 1;
					if ( 1 === preg_match( '#<t[d|h].*colspan=["\']?(\d+)["\']?.*?>#is', $cell_xml, $matches ) ) {
						$colspan = (int) $matches[1];
					}
					if ( 1 === preg_match( '#<t[d|h].*rowspan=["\']?(\d+)["\']?.*?>#is', $cell_xml, $matches ) ) {
						$rowspan = (int) $matches[1];
					}

					// Add cells with the colspan trigger word, if merged cells across columns were found.
					for ( $i = 1; $i < $colspan; $i++ ) {
						$new_row[] = '#colspan#';
					}

					// If merged cells across rows were found, add trigger words to a temporary variable.
					for ( $i = 1; $i < $rowspan; $i++ ) {
						if ( ! isset( $rowspans[ $row_idx + $i ] ) ) {
							$rowspans[ $row_idx + $i ] = array();
						}
						$rowspans[ $row_idx + $i ][ $column_idx ] = '#rowspan#';
						for ( $j = 1; $j < $colspan; $j++ ) {
							$rowspans[ $row_idx + $i ][ $column_idx + $j ] = '#span#';
						}
					}
				} else {
					// Add an empty cell if no content could be extracted from the cell's HTML tag.
					$new_row[] = '';
				}

				++$column_idx;
			}

			// After the last cell in a row: If a cell in a row should be merged with the cell above it, add the trigger word to it.
			while ( isset( $rowspans[ $row_idx ][ $column_idx ] ) ) {
				$new_row[] = $rowspans[ $row_idx ][ $column_idx ];
				++$column_idx;
			}

			$rows[] = $new_row;
			++$row_idx;
		}

		// After the last data row: If all cells in a row should be merged with the cells in the row above, add the trigger word to each of them (should be very rare).
		while ( isset( $rowspans[ $row_idx ] ) && count( $rowspans[ $row_idx ] ) === count( $rows[ $row_idx - 1 ] ) ) { // phpcs:ignore Squiz.PHP.DisallowSizeFunctionsInLoops.Found
			$rows[] = $rowspans[ $row_idx ];
			++$row_idx;
		}

		return $rows;
	}

} // class HTML_Parser