Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ import za.co.absa.cobrix.cobol.parser.CopybookParser.CopybookAST
import za.co.absa.cobrix.cobol.parser.ast.datatype.AlphaNumeric
import za.co.absa.cobrix.cobol.parser.ast.{Group, Primitive, Statement}
import za.co.absa.cobrix.cobol.parser.decoders.StringDecoders
import za.co.absa.cobrix.cobol.parser.decoders.StringDecoders.KeepAll
import za.co.absa.cobrix.cobol.parser.encoding._
import za.co.absa.cobrix.cobol.parser.policies.DebugFieldsPolicy
import za.co.absa.cobrix.cobol.parser.policies.DebugFieldsPolicy.DebugFieldsPolicy
import za.co.absa.cobrix.cobol.utils.StringUtils

import scala.collection.mutable.ArrayBuffer

Expand All @@ -47,7 +47,7 @@ class DebugFieldsAdder(debugFieldsPolicy: DebugFieldsPolicy) extends AstTransfor
}

val debugDecoder = debugFieldsPolicy match {
case DebugFieldsPolicy.HexValue => StringDecoders.decodeHex _
case DebugFieldsPolicy.HexValue => StringUtils.convertArrayToHex _
case DebugFieldsPolicy.RawValue => StringDecoders.decodeRaw _
case DebugFieldsPolicy.StringValue => (a: Array[Byte]) => new String(a)
case _ => throw new IllegalStateException(s"Unexpected debug fields policy: $debugFieldsPolicy.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPoint
import za.co.absa.cobrix.cobol.parser.encoding._
import za.co.absa.cobrix.cobol.parser.encoding.codepage.{CodePage, CodePageCommon}
import za.co.absa.cobrix.cobol.parser.position.Position
import za.co.absa.cobrix.cobol.utils.StringUtils

import java.nio.charset.{Charset, StandardCharsets}
import scala.util.control.NonFatal
Expand Down Expand Up @@ -94,7 +95,7 @@ object DecoderSelector {
case UTF16 =>
StringDecoders.decodeUtf16String(_, getStringStrimmingType(stringTrimmingPolicy), isUtf16BigEndian, improvedNullDetection)
case HEX =>
StringDecoders.decodeHex
StringUtils.convertArrayToHex
case RAW =>
StringDecoders.decodeRaw
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ object StringDecoders {
val TrimBoth = 4
val KeepAll = 5

// Characters used for HEX conversion
private val HEX_ARRAY = "0123456789ABCDEF".toCharArray

/**
* A decoder for any EBCDIC string fields (alphabetical or any char)
*
Expand Down Expand Up @@ -125,24 +122,6 @@ object StringDecoders {
}
}

/**
* A decoder for representing bytes as hex strings
*
* @param bytes A byte array that represents the binary data
* @return A HEX string representation of the binary data
*/
final def decodeHex(bytes: Array[Byte]): String = {
val hexChars = new Array[Char](bytes.length * 2)
var i = 0
while (i < bytes.length) {
val v = bytes(i) & 0xFF
hexChars(i * 2) = HEX_ARRAY(v >>> 4)
hexChars(i * 2 + 1) = HEX_ARRAY(v & 0x0F)
i += 1
}
new String(hexChars)
}

/**
* A decoder that doesn't decode, but just passes the bytes the way they are.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import za.co.absa.cobrix.cobol.parser.ast.Primitive
import za.co.absa.cobrix.cobol.reader.iterator.RecordLengthExpression
import za.co.absa.cobrix.cobol.reader.parameters.ReaderParameters
import za.co.absa.cobrix.cobol.reader.validator.ReaderParametersValidator
import za.co.absa.cobrix.cobol.utils.StringUtils

import scala.util.Try

Expand Down Expand Up @@ -127,7 +128,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,
case l: Long => l.toInt
case s: String => Try{ s.toInt }.getOrElse(throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type, encountered: '$s'."))
case d: BigDecimal => d.toInt
case null => throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${getBytesAsHexString(binaryDataStart)}).")
case null => throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${StringUtils.convertArrayToHex(binaryDataStart)}).")
case _ => throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type.")
}
} else {
Expand All @@ -136,7 +137,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,
case l: Long => getRecordLengthFromMapping(l.toString)
case d: BigDecimal => getRecordLengthFromMapping(d.toString())
case s: String => getRecordLengthFromMapping(s)
case null => defaultRecordLength.getOrElse(throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${getBytesAsHexString(binaryDataStart)})."))
case null => defaultRecordLength.getOrElse(throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${StringUtils.convertArrayToHex(binaryDataStart)})."))
case _ => throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type.")
}
}
Expand All @@ -150,10 +151,6 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext,
}
}

final private def getBytesAsHexString(bytes: Array[Byte]): String = {
bytes.map("%02X" format _).mkString
}

private def fetchRecordUsingRecordLengthFieldExpression(expr: RecordLengthExpression): Option[Array[Byte]] = {
val lengthFieldBlock = expr.requiredBytesToread
val evaluator = expr.evaluator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import za.co.absa.cobrix.cobol.parser.common.Constants
import za.co.absa.cobrix.cobol.parser.encoding.RAW
import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy
import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy.SchemaRetentionPolicy
import za.co.absa.cobrix.cobol.utils.StringUtils

import scala.collection.mutable
import scala.collection.mutable.{ArrayBuffer, ListBuffer}
Expand All @@ -35,21 +36,22 @@ object RecordExtractors {
/**
* This method extracts a record from the specified array of bytes. The copybook for the record needs to be already parsed.
*
* @param ast The parsed copybook.
* @param data The data bits containing the record.
* @param offsetBytes The offset to the beginning of the record (in bits).
* @param policy A schema retention policy to be applied to the extracted record.
* @param variableLengthOccurs If true, OCCURS DEPENDING ON data size will depend on the number of elements.
* @param generateRecordId If true, a record id field will be added as the first field of the record.
* @param generateRecordBytes If true, a record bytes field will be added at the beginning of each record.
* @param generateCorruptFields If true, a corrupt fields field will be appended to the end of the schema.
* @param segmentLevelIds Segment ids to put to the extracted record if id generation it turned on.
* @param fileId A file id to be put to the extractor record if generateRecordId == true.
* @param recordId The record id to be saved to the record id field.
* @param activeSegmentRedefine An active segment redefine (the one that will be parsed).
* All other segment redefines will be skipped.
* @param generateInputFileField if true, a field containing input file name will be generated
* @param inputFileName An input file name to put if its generation is needed
* @param ast The parsed copybook.
* @param data The data bits containing the record.
* @param offsetBytes The offset to the beginning of the record (in bits).
* @param policy A schema retention policy to be applied to the extracted record.
* @param variableLengthOccurs If true, OCCURS DEPENDING ON data size will depend on the number of elements.
* @param generateRecordId If true, a record id field will be added as the first field of the record.
* @param generateRecordBytes If true, a record bytes field will be added at the beginning of each record.
* @param generateCorruptFields If true, a corrupt fields field will be appended to the end of the schema.
* @param generateCorruptFieldsAsHex If true, corrupt fields will be generated as hex strings, otherwise they will be generated as binary data. This parameter is only relevant if generateCorruptFields is true.
* @param segmentLevelIds Segment ids to put to the extracted record if id generation it turned on.
* @param fileId A file id to be put to the extractor record if generateRecordId == true.
* @param recordId The record id to be saved to the record id field.
* @param activeSegmentRedefine An active segment redefine (the one that will be parsed).
* All other segment redefines will be skipped.
* @param generateInputFileField if true, a field containing input file name will be generated
* @param inputFileName An input file name to put if its generation is needed
* @return An Array[Any] object corresponding to the record schema.
*/
@throws(classOf[IllegalStateException])
Expand All @@ -62,6 +64,7 @@ object RecordExtractors {
generateRecordId: Boolean = false,
generateRecordBytes: Boolean = false,
generateCorruptFields: Boolean = false,
generateCorruptFieldsAsHex: Boolean = false,
segmentLevelIds: List[String] = Nil,
fileId: Int = 0,
recordId: Long = 0,
Expand Down Expand Up @@ -213,7 +216,7 @@ object RecordExtractors {
policy
}

applyRecordPostProcessing(ast, records.toList, effectiveSchemaRetentionPolicy, generateRecordId, generateRecordBytes, generateCorruptFields, segmentLevelIds, fileId, recordId, data.length, data, generateInputFileField, inputFileName, corruptFields, handler)
applyRecordPostProcessing(ast, records.toList, effectiveSchemaRetentionPolicy, generateRecordId, generateRecordBytes, generateCorruptFields, generateCorruptFieldsAsHex, segmentLevelIds, fileId, recordId, data.length, data, generateInputFileField, inputFileName, corruptFields, handler)
}

/**
Expand Down Expand Up @@ -433,7 +436,7 @@ object RecordExtractors {
policy
}

applyRecordPostProcessing(ast, records.toList, effectiveSchemaRetentionPolicy, generateRecordId, generateRecordBytes = false, generateCorruptFields = false, Nil, fileId, recordId, recordLength, Array.empty[Byte], generateInputFileField = generateInputFileField, inputFileName, null, handler)
applyRecordPostProcessing(ast, records.toList, effectiveSchemaRetentionPolicy, generateRecordId, generateRecordBytes = false, generateCorruptFields = false, generateCorruptFieldsAsHex = false, Nil, fileId, recordId, recordLength, Array.empty[Byte], generateInputFileField = generateInputFileField, inputFileName, null, handler)
}

/**
Expand All @@ -449,16 +452,18 @@ object RecordExtractors {
* Combinations of the listed transformations are supported.
* </p>
*
* @param ast The parsed copybook
* @param records The array of [[T]] object for each Group of the copybook
* @param generateRecordId If true a record id field will be added as the first field of the record.
* @param generateRecordBytes If true a record bytes field will be added at the beginning of the record.
* @param generateCorruptFields If true,a corrupt fields field will be appended to the end of the schema.
* @param fileId The file id to be saved to the file id field
* @param recordId The record id to be saved to the record id field
* @param recordByteLength The length of the record
* @param generateInputFileField if true, a field containing input file name will be generated
* @param inputFileName An input file name to put if its generation is needed
* @param ast The parsed copybook
* @param records The array of [[T]] object for each Group of the copybook
* @param generateRecordId If true a record id field will be added as the first field of the record.
* @param generateRecordBytes If true a record bytes field will be added at the beginning of the record.
* @param generateCorruptFields If true,a corrupt fields field will be appended to the end of the schema.
* @param generateCorruptFieldsAsHex If true, corrupt fields will be generated as hex strings, otherwise they will be generated as binary data. This parameter is only relevant if generateCorruptFields is true.
* @param segmentLevelIds Segment ids to put to the extracted record if id generation it turned on.
* @param fileId The file id to be saved to the file id field
* @param recordId The record id to be saved to the record id field
* @param recordByteLength The length of the record
* @param generateInputFileField if true, a field containing input file name will be generated
* @param inputFileName An input file name to put if its generation is needed
* @return A [[T]] object corresponding to the record schema
*/
private def applyRecordPostProcessing[T](
Expand All @@ -468,6 +473,7 @@ object RecordExtractors {
generateRecordId: Boolean,
generateRecordBytes: Boolean,
generateCorruptFields: Boolean,
generateCorruptFieldsAsHex: Boolean = false,
segmentLevelIds: List[String],
fileId: Int,
recordId: Long,
Expand Down Expand Up @@ -515,7 +521,12 @@ object RecordExtractors {
val ar = new Array[Any](len)
var i = 0
while (i < len) {
val r = handler.create(Array[Any](corruptFields(i).fieldName, corruptFields(i).rawValue), corruptFieldsGroup)
val r = if (generateCorruptFieldsAsHex) {
val hex = StringUtils.convertArrayToHex(corruptFields(i).rawValue)
handler.create(Array[Any](corruptFields(i).fieldName, hex), corruptFieldsGroup)
} else {
handler.create(Array[Any](corruptFields(i).fieldName, corruptFields(i).rawValue), corruptFieldsGroup)
}
ar(i) = r
i += 1
}
Expand All @@ -525,7 +536,7 @@ object RecordExtractors {
// toList() is a constant time operation, and List implements immutable Seq, which is exactly what is needed here.
outputRecords.toList
}

/**
* Constructs a Group object representing corrupt fields. It is only needed for constructing records that require field names,
* such as JSON. Field sizes and encoding do not really matter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package za.co.absa.cobrix.cobol.reader.iterator

import za.co.absa.cobrix.cobol.internal.Logging
import za.co.absa.cobrix.cobol.reader.extractors.record.{RecordExtractors, RecordHandler}
import za.co.absa.cobrix.cobol.reader.parameters.ReaderParameters
import za.co.absa.cobrix.cobol.reader.parameters.{CorruptFieldsPolicy, ReaderParameters}
import za.co.absa.cobrix.cobol.reader.schema.CobolSchema
import za.co.absa.cobrix.cobol.reader.validator.ReaderParametersValidator

Expand Down Expand Up @@ -47,6 +47,8 @@ class FixedLenNestedRowIterator[T: ClassTag](
private val segmentIdFilter = readerProperties.multisegment.flatMap(_.segmentIdFilter)
private val segmentRedefineMap = readerProperties.multisegment.map(_.segmentIdRedefineMap).getOrElse(HashMap[String, String]())
private val segmentRedefineAvailable = segmentRedefineMap.nonEmpty
private val generateCorruptFields = readerProperties.corruptFieldsPolicy != CorruptFieldsPolicy.Disabled
private val generateCorruptFieldsAsHex = readerProperties.corruptFieldsPolicy == CorruptFieldsPolicy.Hex

override def hasNext: Boolean = {
val correctOffset = if (singleRecordOnly) {
Expand Down Expand Up @@ -90,7 +92,8 @@ class FixedLenNestedRowIterator[T: ClassTag](
readerProperties.schemaPolicy,
readerProperties.variableSizeOccurs,
generateRecordBytes = readerProperties.generateRecordBytes,
generateCorruptFields = readerProperties.generateCorruptFields,
generateCorruptFields = generateCorruptFields,
generateCorruptFieldsAsHex = generateCorruptFieldsAsHex,
activeSegmentRedefine = activeSegmentRedefine,
handler = handler
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import za.co.absa.cobrix.cobol.parser.Copybook
import za.co.absa.cobrix.cobol.parser.headerparsers.RecordHeaderParser
import za.co.absa.cobrix.cobol.reader.extractors.raw.RawRecordExtractor
import za.co.absa.cobrix.cobol.reader.extractors.record.{RecordExtractors, RecordHandler}
import za.co.absa.cobrix.cobol.reader.parameters.ReaderParameters
import za.co.absa.cobrix.cobol.reader.parameters.{CorruptFieldsPolicy, ReaderParameters}
import za.co.absa.cobrix.cobol.reader.stream.SimpleStream

import scala.collection.immutable.HashMap
Expand Down Expand Up @@ -60,6 +60,8 @@ final class VarLenNestedIterator[T: ClassTag](cobolSchema: Copybook,
private val segmentRedefineMap = readerProperties.multisegment.map(_.segmentIdRedefineMap).getOrElse(HashMap[String, String]())
private val segmentRedefineAvailable = segmentRedefineMap.nonEmpty
private val generateInputFileName = readerProperties.inputFileNameColumn.nonEmpty
private val generateCorruptFields = readerProperties.corruptFieldsPolicy != CorruptFieldsPolicy.Disabled
private val generateCorruptFieldsAsHex = readerProperties.corruptFieldsPolicy == CorruptFieldsPolicy.Hex

fetchNext()

Expand Down Expand Up @@ -99,7 +101,8 @@ final class VarLenNestedIterator[T: ClassTag](cobolSchema: Copybook,
readerProperties.variableSizeOccurs,
readerProperties.generateRecordId,
readerProperties.generateRecordBytes,
readerProperties.generateCorruptFields,
generateCorruptFields,
generateCorruptFieldsAsHex,
segmentLevelIds,
fileId,
rawRecordIterator.getRecordIndex,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,16 @@ object CobolParametersParser extends Logging {
else
None

val corruptFieldsPolicy = if (parameters.generateCorruptFields) {
if (parameters.decodeBinaryAsHex) {
CorruptFieldsPolicy.Hex
} else {
CorruptFieldsPolicy.Binary
}
} else {
CorruptFieldsPolicy.Disabled
}

ReaderParameters(
recordFormat = parameters.recordFormat,
isEbcdic = parameters.isEbcdic,
Expand Down Expand Up @@ -433,7 +443,7 @@ object CobolParametersParser extends Logging {
fileEndOffset = varLenParams.fileEndOffset,
generateRecordId = varLenParams.generateRecordId,
generateRecordBytes = parameters.generateRecordBytes,
generateCorruptFields = parameters.generateCorruptFields,
corruptFieldsPolicy = corruptFieldsPolicy,
schemaPolicy = parameters.schemaRetentionPolicy,
stringTrimmingPolicy = parameters.stringTrimmingPolicy,
isDisplayAlwaysString = parameters.isDisplayAlwaysString,
Expand Down
Loading