#include-once
#include <Array.au3>

; #FUNCTION# ======================================================================================
; Name ..........: _parseCSV()
; Description ...: converts a csv string or file into an array with the ability of user-defined parsing of the values
; Syntax ........: _parseCSV($sInput, [$bHeader = False, [$bSkipHeader = False, [$cSep = ';', [$cQuote = '"', [$sValParseDef = Default, [$bTrimWS = True]]]]]])
; Parameters ....: $sInput        - a csv-formatted string, a file name/path or a FileRead-Handle to a csv-formatted file
;                  $bHeader       - True: first record = Header
;                                   False = no header record
;                  $bSkipHeader   - True: header should be part of the return array
;                                   False = not part
;                  $cSep          - single(!) character for the value seperator (common: ";", "," or "|")
;                  $cQuote        - single(!) character for the quotation character (common: '"') - can escaped inside nested quotation through doubling
;                  $sValParseDef  - semicolon separeted string-list of function names which are applied to each value in their column
;                                       this is to process or parse the values inside the columns.
;                                       a null string means leave at is - so you can skip columns
;                                       example: ";Number;;_userdefFunc;StringUpper"
;                                   Default: treat each value as a string
;                                   "Auto": convert values that represent numbers in AutoIt syntax to number types
;                  $bTrimWS       - True: Whitespace before and after the value are not treated as a part of them
;                                 - False: Whitespaces before and after the value are treated as a part of them
; Return values .: Success: an array with the formatted contents of the CSV-input
;                  Failure: False
;                     @error = 1: error when reading from the specified file in $sInput (@extended = @error of FileRead)
;                     @error = 2: invalid data type of $sInput
;                     @error = 3: no possibility to derive data records from $sInput
;                     @error = 4: no data records available
;                     @error = 5: error in determining the number of columns
;                     @error = 6: error while processing $sValParseDef
;                     @error = 7: invalid user-defined parsing-function specified (@extended = column number where the error occurs)
;                     @error = 8: error while executing the user-defined parsing-function (@extended = @error of user defined parsing-function)
;                     @error = 9: seperator ($cSep) is not a single char
;                     @error = 10: quote ($cQuote) is not a single char
; Author ........: AspirinJunkie
; Last changed ..: 2020-07-31
; Version .......: 0.8
; =================================================================================================
Func _parseCSV($sInput, $bHeader = False, $bSkipHeader = False, $cSep = ';', $cQuote = '"', $sValParseDef = Default, $bTrimWS = True)

	Local Const $patCSV = "(?mx)(?(DEFINE)" & @LF & _
			"(?'VALUNQUOTED' [^\Q" & $cSep & "\E\v\Q" & $cQuote & "\E]*+)" & @LF & _
			"(?'VALQUOTED'  \Q" & $cQuote & "\E(?>[^\Q" & $cQuote & "\E]++|\Q" & $cQuote & $cQuote & "\E)*+\Q" & $cQuote & "\E)" & @LF & _
			"(?'VALUE'\h*\K\g<VALQUOTED>\h* | (?<=\Q" & $cSep & "\E|^)\g<VALUNQUOTED>)" & @LF & _
			"(?'RECORD' ^(?> \g<VALQUOTED> | [^\Q" & $cQuote & "\E\v]*)*+ $ )" & @LF & _
		")"
	Local Const $patValues = $bTrimWS ? "(?|\h*\Q" & $cQuote & "\E((?>[^\Q" & $cQuote & "\E]++|\Q" & $cQuote & $cQuote & "\E)*+)\Q" & $cQuote & "\E\h*|\h*([^\Q" & $cSep & "\E\v]+)|(?<=\Q" & $cSep & "\E|\A).{0})" _
							: "(?|\h*\Q" & $cQuote & "\E((?>[^\Q" & $cQuote & "\E]++|\Q" & $cQuote & $cQuote & "\E)*+)\Q" & $cQuote & "\E\h*|[^\Q" & $cSep & "\E\v]+|(?<=\Q" & $cSep & "\E|\A).{0})"

	Local Const $bAutoParse = $sValParseDef = "Auto" ; beware - enables code injection
	ClipPut($patValues)
	; check some parameters:
	If StringLen($cSep) > 1 Then Return SetError(9, StringLen($cSep), False)
	If StringLen($cQuote) > 1 Then Return SetError(10, StringLen($cQuote), False)

	; read file if $sInput = file name or file handle
	If FileExists($sInput) Or IsInt($sInput) Then $sInput = FileRead($sInput)
	If @error Then Return SetError(1, @error, False)

	If Not IsString($sInput) Then Return SetError(2, 0, False)

	; delete trailing empty lines
	$sInput = StringRegExpReplace($sInput, '\s+\Z', '')

	; convert line ends to \n (@LF) only
	$sInput = StringRegExpReplace($sInput, '\r\n|\r', @LF)

	; determine the number of records
	Local $aRecords = StringRegExp($sInput, $patCSV & '(?&RECORD)', 3)
	If @error Then Return SetError(3, @error, False)
	Local $nRecords = UBound($aRecords) - ($bSkipHeader ? 1 : 0)
	If $nRecords < 1 Then Return SetError(4, $nRecords, False)

	; determine the number of (initial) columns:
	StringRegExpReplace($aRecords[0], $patValues, '')
	If @error Then Return SetError(5, @error, False)
	Local $nCols = @extended

	; define the return array
	Local $aRet[$nRecords][$nCols]

	; prepare the parsing definition string
	Local $bUserParsing = $sValParseDef <> Default And $sValParseDef <> "Auto"
	If $bUserParsing Then
		Local $aParsing = StringSplit($sValParseDef, ';', 3)
		If @error Then Return SetError(6, @error, False)
		; check if named functions really exist
		For $iI = 0 To UBound($aParsing) - 1
			$aParsing[$iI] = StringStripWS($aParsing[$iI], 8)
			If $aParsing[$iI] = "" Then ContinueLoop ; ""- means leave at is
			If Not IsFunc(Execute($aParsing[$iI])) Then Return SetError(7, $iI + 1, False)
		Next
		ReDim $aParsing[$nCols]
	EndIf

	; iterate over all records
	Local $aVals, $sVal, $iJ
	For $iI = $bSkipHeader To $nRecords - ($bSkipHeader ? 0 : 1)
		$iJ = 0

		; iterate over all values in the current record
		For $sVal In StringRegExp($aRecords[$iI], $patValues, 3)
			; resize return array if current record has more values than previous ones
			If $iJ >= $nCols Then
				ReDim $aRet[UBound($aRet)][$iJ + 1]
				If $bUserParsing Then ReDim $aParsing[$iJ + 1]
			EndIf

			; remove leading/trailing whitespaces if set
			If $bTrimWS Then $sVal = StringStripWS($sVal, 2)

			; unescape double quotes
			$sVal = StringReplace($sVal, $cQuote & $cQuote, $cQuote, 0, 1)

			; parse the string-type values if parameter is set
			If $bAutoParse Then
				If StringRegExp($sVal, '(?i)\A(?|0x\d+|[-+]?(?>\d+)(?>\.\d+)?(?:e[-+]?\d+)?)\Z') Then $sVal = Number($sVal)
			ElseIf $bUserParsing Then
				If $aParsing[$iJ] <> "" And Not ($bHeader And $iI = 0) Then $sVal = Call($aParsing[$iJ], $sVal)
				If @error Then Return SetError(8, @error, False)
			EndIf

			; add current value to return array
			$aRet[$iI - ($bSkipHeader ? 1 : 0)][$iJ] = $sVal
			$iJ += 1
		Next
	Next

	Return $aRet
EndFunc   ;==>_parseCSV


; #FUNCTION# ======================================================================================
; Name ..........: _Array2Dics()
; Description ...: converts a 2D-Array (rows=records, columns=values) into a set of objects (every record = Dictionary with named attributes)
; Syntax ........: _Array2Dics(ByRef $aArray, [$sHeader = Default, [$bHeader = False]])
; Parameters ....: $aArray        - the input array
;                  $sHeader       - Default: the header elements (attribute names) are taken from the first row
;                                   String: Semicolon separated entrys for the header (=attribute names), must have at least as many elements as there are columns
;                  $bHeader       - True: header exists in the first row, if $bHeader = String then this header is skipped and the $sHeader is taken
;                                   False = no header row exists - $sHeader must be a string
; Return values .: Success:       1D-array with record-objects of type Scripting.Dictionary
;                  Failure: False
;                     @error = 1: no attribute names given because $sHeader = Default and $bHeader = False (no header in first row)
;                     @error = 2: $aArray is not a 2D-Array (@extended = num of dimensions of $aArray)
;                     @error = 3: error when processing $sHeader
;                     @error = 4: Less unique attribute names given than attributes themselves exists (n(header elements) < n(columns))
; Author ........: AspirinJunkie
; Last changed ..: 2020-06-31
; Version .......: 0.5
; =================================================================================================
Func _Array2Dics(ByRef $aArray, $sHeader = Default, $bHeader = True)
	If $sHeader = Default And $bHeader = False Then Return SetError(1, 0, False)
	If UBound($aArray, 0) <> 2 Then Return SetError(2, UBound($aArray), False)

	; prepare the header values
	If $sHeader <> Default Then
		Local $aHeader = StringRegExp($sHeader, '\h*\K("(?>[^"]+|"")*"|[^";]++)\h*', 3)
		If @error Then Return SetError(3, @error, False)
	Else
		$bHeader = True ; field must be written somewhere

		Local $aHeader[UBound($aArray, 2)]
		For $iI = 0 To UBound($aHeader) - 1
			$aHeader[$iI] = $aArray[0][$iI]
		Next
	EndIf

	; process the header values
	For $iI = 0 To UBound($aHeader) - 1
		If StringRegExp($aHeader[$iI], '"(?> [^"]+ | "" )*"') Then $aHeader[$iI] = StringReplace(StringRegExpReplace($aHeader[$iI], '(?s)^\h*"(.*)"\h*$', "\1"), '""', '"', 0, 1)
	Next
	$aHeader = _ArrayUnique($aHeader, 0, 0, 1, 0)
	If UBound($aHeader) < UBound($aArray, 2) Then Return SetError(4, UBound($aHeader), False)

	; prepare return Array
	Local $aRet[UBound($aArray) - ($bHeader ? 1 : 0)]

	Local $oDic
	For $iI = ($bHeader ? 1 : 0) To UBound($aArray) - 1
		$oDic = ObjCreate("Scripting.Dictionary")
		For $iJ = 0 To UBound($aArray, 2) - 1
			$oDic($aHeader[$iJ]) = $aArray[$iI][$iJ]
		Next
		$aRet[$iI - ($bHeader ? 1 : 0)] = $oDic
	Next

	Return $aRet
EndFunc   ;==>_Array2Dics


; #FUNCTION# ======================================================================================
; Name ..........: _Array2CSV()
; Description ...: converts a 2D-Array (rows=records, columns=values) into a csv-style string
; Syntax ........: _Array2CSV(ByRef $aArray, [$sHeader = Default, [$cSep = ';', [$cQuote = '', [$sLB = @CRLF]]]])
; Parameters ....: $aArray        - the input array
;                  $sHeader       - Default: no external header is given
;                                   String: string to add as the first line of return string
;                                   1D-Array: header elements
;                  $cSep          - character for the value seperator (common: ";", "," or "|")
;                  $cQuote        - character for the quotation character (common: '"') - if already inside value than escaped through doubling
;                  $sLB           - String which is used for line-breaks (common: @CRLF)
; Return values .: Success:       csv-formatted string
;                  Failure: False
;                     @error = 1: $aArray is not a 2D-Array (@extended = num of dimensions of $aArray)
;                     @error = 2: input array for $sHeader has not array dimension 1
; Author ........: AspirinJunkie
; Last changed ..: 2020-07-31
; Version .......: 0.8
; =================================================================================================
Func _Array2CSV(ByRef $aArray, $sHeader = Default, $cSep = ';', $cQuote = '', $sLB = @CRLF)
	If UBound($aArray, 0) <> 2 Then Return SetError(1, UBound($aArray), False)

	Local $sRet = ""

	; add header if given
	If IsString($sHeader) Then
		$sRet &= $sHeader & $sLB
	ElseIf IsArray($sHeader) Then
		If UBound($sHeader) <> 1 Then Return SetError(2, UBound($sHeader), False)
		For $sV In $sHeader
			If StringRegExp($sV, '[\R\Q' & $cSep & '\E]') And $cQuote = "" Then
				$sV = '"' & StringReplace($sV, '"', '""', 0, 1) & '"'  ; must quote if line breaks exists in value
			ElseIf $cQuote <> "" Then
				$sV = $cQuote & StringReplace($sV, $cQuote, $cQuote & $cQuote, 0, 1) & $cQuote
			EndIf
			$sRet &= $sV & $cSep
		Next
		$sRet = StringTrimRight($sRet, StringLen($cSep)) & $sLB
	EndIf

	; add values
	Local $sV
	For $iR = 0 To UBound($aArray) - 1
		For $iC = 0 To UBound($aArray, 2) - 1
			$sV = $aArray[$iR][$iC]
			If StringRegExp($sV, '\R|[\Q' & $cSep & $cQuote & '\E]') Then
				$sV = $cQuote = "" _
						 ? '"' & StringReplace($sV, '"', '""', 0, 1) & '"' _
						 : $cQuote & StringReplace($sV, $cQuote, $cQuote & $cQuote, 0, 1) & $cQuote
			EndIf
			$sRet &= $sV & $cSep
		Next
		$sRet = StringTrimRight($sRet, StringLen($cSep)) & $sLB
	Next

	Return StringTrimRight($sRet, StringLen($sLB))

EndFunc   ;==>_Array2CSV



; #FUNCTION# ======================================================================================
; Name ..........: _StringHandleTable()
; Description ...: parse table-like strings with fixed column widths and
;                  parse or process their content into an array
; Syntax ........: _StringHandleTable(ByRef Const $s_String, Const $s_Format)
; Parameters ....: $s_String     - the table-like string to parse
;                  $s_Format     - dllcall-like syntax where columns separated by ";":
;                                 COLUMNTYPE XX; COLUMNTYPE X; COLUMNTYPE XXX...
;                        Where - COLUMNTYPE: Any autoit-functionname to process - normally e.g.
;                                            Int(), Float(), String(), or any other function
;                                            additionaly "leftstring" and "rightstring" for aligned strings
;                                            if "void" - colum will be ignored
;                                XX:         column-width as number of chars (can be left out in the last column)
; Return values .: Success: returns a array
;                  Failure: set @error and returns a debug-string
; Example .......:
;                  #include <Array.au3>
;
;                  $s_String = "  1 sinnlos links           rechts  13,00�  " & @CRLF & _
;                          " 10 sinnlos irgendwas    irgendwas  12,00�" & @CRLF & _
;                          @CRLF & _
;                          " 20 sinnlos blabla          blabla  16,30�"
;
;                  ConsoleWrite($s_String & @CRLF)
;
;                  $a_Data = String_HandleTable($s_String, "int 4; void 8; leftString 10; rightString 12; Euro 8")
;                  _ArrayDisplay($a_Data, "parsed")
;
;                  ; example how to parse currency-columns
;                  Func Euro(Const $s_String)
;                      Return Number(StringReplace($s_String, ",", ".", 0, 1))
;                  EndFunc   ;==>Euro
;
; Author ........: AspirinJunkie
; =================================================================================================
Func _StringHandleTable(ByRef Const $s_String, Const $s_Format)
	; by AspirinJunkie
	; create format array
	Local $a_Split = StringRegExp($s_Format, '\s*(\w+)\s*(\d+)?', 4)
	If @error Then Return SetError(1, @error, "")
	Local $a_Cols[UBound($a_Split)][3]

	Local $x = 1, $j = 0
	For $i In $a_Split
		If UBound($i) < 3 Then
			ReDim $i[3]
			$i[2] = -1
		EndIf
		$x += $i[2]
		If $i[1] = "void" Then ContinueLoop
		$a_Cols[$j][0] = $i[1]
		$a_Cols[$j][1] = $x - $i[2]
		$a_Cols[$j][2] = $i[2]

		$j += 1
	Next
	If $j <> UBound($a_Cols) Then ReDim $a_Cols[$j][3]

	; process string
	Local $a_Split = StringRegExp($s_String, "([^\r\n|\n|\r]+)", 3)
	Local $a_Ret[UBound($a_Split)][UBound($a_Cols)]
	Local $s_rawVal, $s_Val

	$x = 0
	For $s_Line In $a_Split
		For $i = 0 To UBound($a_Cols) - 1
			$s_rawVal = StringMid($s_Line, $a_Cols[$i][1], $a_Cols[$i][2])
			If $a_Cols[$i][0] = "leftString" Then
				$s_Val = StringStripWS($s_rawVal, 2)
			ElseIf $a_Cols[$i][0] = "rightString" Then
				$s_Val = StringStripWS($s_rawVal, 1)
			Else
				$s_Val = Execute($a_Cols[$i][0] & '("' & $s_rawVal & '")')
				If @error Then ContinueLoop
			EndIf
			$a_Ret[$x][$i] = $s_Val
		Next
		$x += 1
	Next
	Return $a_Ret
EndFunc   ;==>_StringHandleTable
