Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Regex support in valid key #200

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions Test.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?php
// This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild

namespace {
class Test extends \Kaitai\Struct\Struct {
public function __construct(\Kaitai\Struct\Stream $_io, \Kaitai\Struct\Struct $_parent = null, \Test $_root = null) {
parent::__construct($_io, $_parent, $_root);
$this->_read();
}

private function _read() {
$this->_m_name = \Kaitai\Struct\Stream::bytesToStr($this->_io->readBytes(2), "UTF-8");
if (!(preg_match("/\d\d/", $this->name()))) {
throw new \Kaitai\Struct\Error\ValidationRegexMatchError("\\d\\d", $this->name(), $this->_io(), "/seq/0");
}
}
protected $_m_name;
public function name() { return $this->_m_name; }
}
}
9 changes: 9 additions & 0 deletions shared/src/main/scala/io/kaitai/struct/datatype/KSError.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ object KSError {
case "ValidationLessThanError" => ValidationLessThanError
case "ValidationGreaterThanError" => ValidationGreaterThanError
case "ValidationNotAnyOfError" => ValidationNotAnyOfError
case "ValidationRegexMatchError" => ValidationRegexMatchError
}
excClass(dataType)
}
Expand Down Expand Up @@ -64,6 +65,14 @@ case class ValidationNotAnyOfError(_dt: DataType) extends ValidationError(_dt) {
def name = "ValidationNotAnyOfError"
}

/**
* Error to be thrown when validation fails with actual not matching regex
* @param _dt data type used in validation process
*/
case class ValidationRegexMatchError(_dt: DataType) extends ValidationError(_dt) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For equality checks (and storing original value in question for C++), we need to know original value data type. But regular expression matches would likely only work for strings. What's the point in passing _dt: DataType here?

def name = "ValidationRegexMatchError"
}

/**
* Exception that is thrown when we can't decided on endianness
* and thus can't proceed with parsing.
Expand Down
1 change: 1 addition & 0 deletions shared/src/main/scala/io/kaitai/struct/exprlang/Ast.scala
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ object Ast {
case class IfExp(condition: expr, ifTrue: expr, ifFalse: expr) extends expr
// case class Dict(keys: Seq[expr], values: Seq[expr]) extends expr
case class Compare(left: expr, ops: cmpop, right: expr) extends expr
case class RegexMatch(str: expr, regex: String) extends expr
case class Call(func: expr, args: Seq[expr]) extends expr
case class IntNum(n: BigInt) extends expr
case class FloatNum(n: BigDecimal) extends expr
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ case class ValidationMax(max: Ast.expr) extends ValidationSpec
case class ValidationRange(min: Ast.expr, max: Ast.expr) extends ValidationSpec
case class ValidationAnyOf(values: List[Ast.expr]) extends ValidationSpec
case class ValidationExpr(checkExpr: Ast.expr) extends ValidationSpec
case class ValidationRegex(checkExpr: String) extends ValidationSpec

object ValidationEq {
val LEGAL_KEYS = Set("eq")
Expand Down Expand Up @@ -68,16 +69,37 @@ object ValidationExpr {
}
}

object ValidationRegex {
val LEGAL_KEYS = Set("regex")

def fromMap(src: Map[String, Any], path: List[String]): Option[ValidationRegex] =
/*val regex = ParseUtils.getOptValueExpression(src, "regex", path)
regex match {
case Some(regexCt) =>
ParseUtils.ensureLegalKeys(src, LEGAL_KEYS, path)
Some((ValidationRegex(Ast.expr.Str(regexCt))))
case None => None
}
}*/
ParseUtils.getOptValueStr(src, "regex", path).map { case eqExpr =>
ParseUtils.ensureLegalKeys(src, LEGAL_KEYS, path)
ValidationRegex(eqExpr)
}
}


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: unnecessary double newline.

Suggested change

object ValidationSpec {
val LEGAL_KEYS =
ValidationEq.LEGAL_KEYS ++
ValidationRange.LEGAL_KEYS ++
ValidationAnyOf.LEGAL_KEYS ++
ValidationExpr.LEGAL_KEYS
ValidationExpr.LEGAL_KEYS ++
ValidationRegex.LEGAL_KEYS

def fromYaml(src: Any, path: List[String]): ValidationSpec = {
src match {
case value: String =>

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: unnecessary newline

Suggested change

fromString(value, path)
case x: Boolean =>
fromString(x.toString, path)
Expand Down Expand Up @@ -108,6 +130,9 @@ object ValidationSpec {
val opt4 = ValidationExpr.fromMap(src, path)
if (opt4.nonEmpty)
return opt4.get
val opt5 = ValidationRegex.fromMap(src, path)
if (opt5.nonEmpty)
return opt5.get

// No validation templates matched, check for any bogus keys
ParseUtils.ensureLegalKeys(src, LEGAL_KEYS, path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,7 @@ class CppCompiler(
case _: ValidationLessThanError => "validation_less_than_error"
case _: ValidationGreaterThanError => "validation_greater_than_error"
case _: ValidationNotAnyOfError => "validation_not_any_of_error"
case _: ValidationRegexMatchError => "validation_regex_match_error"
}
s"kaitai::$cppErrName<$cppType>"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class RustCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
importList.add("std::default::Default")
importList.add("kaitai_struct::KaitaiStream")
importList.add("kaitai_struct::KaitaiStruct")
importList.add("regex::Regex")

out.puts
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,19 @@ trait ValidateOps extends ExceptionNames {
case ValidationRange(min, max) =>
attrValidateExprCompare(attrId, attr, Ast.cmpop.GtE, min, ValidationLessThanError(attr.dataTypeComposite))
attrValidateExprCompare(attrId, attr, Ast.cmpop.LtE, max, ValidationGreaterThanError(attr.dataTypeComposite))
case ValidationRegex(regex) =>
val regexMatch = Ast.expr.RegexMatch(Ast.expr.Name(attrId.toAstIdentifier), regex)
attrValidateExpr(attrId, attr.dataTypeComposite,
checkExpr = regexMatch,
errName = ksErrorName(ValidationRegexMatchError(attr.dataTypeComposite)),
errArgs = List(
Ast.expr.Str(regex),
Ast.expr.Name(attrId.toAstIdentifier),
Ast.expr.Name(IoIdentifier.toAstIdentifier),
Ast.expr.Str(attr.path.mkString("/", "/", ""))
)
)

case ValidationAnyOf(values) =>
val bigOrExpr = Ast.expr.BoolOp(
Ast.boolop.Or,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,14 @@ abstract class BaseTranslator(val provider: TypeProvider)
case (ltype, rtype) =>
throw new TypeMismatchError(s"can't compare $ltype and $rtype")
}
case Ast.expr.RegexMatch(str: Ast.expr, regex: String) => {
detectType(str) match {
case (_: StrType) =>
doRegexMatchOp(translate(str), doRegex(regex))
case _ =>
throw new TypeMismatchError(s"regex match need strings")
}
}
case Ast.expr.BinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr) =>
(detectType(left), detectType(right), op) match {
case (_: NumericType, _: NumericType, _) =>
Expand Down Expand Up @@ -179,6 +187,7 @@ abstract class BaseTranslator(val provider: TypeProvider)

def doEnumByLabel(enumTypeAbs: List[String], label: String): String
def doEnumById(enumTypeAbs: List[String], id: String): String
def doRegex(reg: String): String = reg

// Predefined methods of various types
def strConcat(left: Ast.expr, right: Ast.expr): String = s"${translate(left)} + ${translate(right)}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ class CSharpTranslator(provider: TypeProvider, importList: ImportList) extends B
}
}

override def doRegexMatchOp(str: String, regex: String): String = {
importList.add("System.Text.RegularExpressions")
s"new Regex(${regex}).IsMatch(${str})"
}

override def doRegex(reg: String) : String = doStringLiteral(reg)

override def doBytesCompareOp(left: Ast.expr, op: Ast.cmpop, right: Ast.expr): String =
s"(${CSharpCompiler.kstreamName}.ByteArrayCompare(${translate(left)}, ${translate(right)}) ${cmpOp(op)} 0)"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,7 @@ trait CommonOps extends AbstractTranslator {
case Ast.unaryop.Minus => "-"
case Ast.unaryop.Not => "!"
}

def doRegexMatchOp(str: String, regex: String): String

}
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,13 @@ class CppTranslator(provider: TypeProvider, importListSrc: CppImportList, import
}
}

override def doRegexMatchOp(str: String, regex: String): String = {
importListSrc.addSystem("regex")
s"std::regex_match(${str}, std::regex(${regex}))"
}

override def doRegex(reg: String): String = doStringLiteral(reg)

override def arraySubscript(container: expr, idx: expr): String =
s"${translate(container)}->at(${translate(idx)})"
override def doIfExp(condition: expr, ifTrue: expr, ifFalse: expr): String =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class ExpressionValidator(val provider: TypeProvider)
case Ast.expr.Compare(left: Ast.expr, op: Ast.cmpop, right: Ast.expr) =>
validate(left)
validate(right)
case Ast.expr.RegexMatch(str: Ast.expr, regex: String) =>
validate(str)
case Ast.expr.BinOp(left: Ast.expr, op: Ast.operator, right: Ast.expr) =>
validate(left)
validate(right)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,11 @@ class GoTranslator(out: StringLanguageOutputWriter, provider: TypeProvider, impo
}
}

override def doRegexMatchOp(str: String, regex: String): String = {
importList.add("regexp")
s"matched, _ := regexp.MatchString(`${regex}`, ${trStringLiteral(str)}); matched"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this fail if it will be used twice in the same function?

Copy link
Member

@generalmimon generalmimon Apr 3, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And I think it's not wise to ignore the error in case it occurs (doing matched, _ :=). Please follow the same approach used in the rest of the GoTranslator, notice that the methods you added are ones of the few in the file returning String and not TranslatorResult. There is as special method outVarCheckRes handling the errors returned from functions, I suggest using that one.

Copy link
Author

@jocelynke jocelynke Apr 4, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to modify the translateExpr func in GoTranslator, but it seems to me that this function is not executed (I use some logs), could you explain it to me ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this fail if it will be used twice in the same function?

Yes, it probably will, because the variables can't be redeclared in the same scope in Go unless some new variables are introduced. I've made a simple test: https://repl.it/repls/WonderfulJointProcedure

I tried to modify the translateExpr func in GoTranslator, but it seems to me that this function is not executed

@jocelynke Go language didn't play well with the classic BaseTranslator model that returns Strings, because of the error handling in Go. Go doesn't have try/catch as other languages, it treats errors as values. This means that if you want the error from a function you call to bubble up in the call stack, it won't happen automatically. You have to check if the error value returned from the function is nil and if it's not, then an error occured and you should return it to the caller. (Just returning it isn't exactly debug-friendly, because the caller loses any context which function in the stack actually caused the error, but it's unfortunately done this way in KS currently.)

This means that parsing a single u1 field from the stream can't be done in a single expression (because it can of course result in an EOF error), but instead it is a 3-phase process:

func (this *HelloWorld) Read(...) (err error) {
 // ...
    tmp1, err := this._io.ReadU1()
    if err != nil {
        return err
    }
    this.One = tmp1

Because of this, GoTranslator can't just extend BaseTranslator as other languages do, because the methods in BaseTranslator have return type String which doesn't fit for Go (and it can't be changed in the derived classes). Not all methods in GoTranslator can be implemented in a single expression which can be passed as String, for these the return value of the called error-returning method is saved to a temporary variable (named tmp1, tmp2, ... to avoid name conflicts), the error is handled and only the "serial number" of the temp variable is returned. This is done by the method outVarCheckRes.

Hence most methods in GoTranslator return TranslatorResult, which is defined on the top:

sealed trait TranslatorResult
case class ResultString(s: String) extends TranslatorResult
case class ResultLocalVar(n: Int) extends TranslatorResult

And now how you should do it. I think the doRegexMatchOp here could look like this:

override def doRegexMatchOp(str: String, regex: String): TranslatorResult = {
  importList.add("regexp")
  outVarCheckRes(s"regexp.MatchString(`${regex}`, ${trStringLiteral(str)})")

Then you'll have to move the abstract definition of this method from CommonOps to BaseTranslator:

def doRegexMatchOp(str: String, regex: String): String

The reason is that CommonOps is inherited by GoTranslator too (whereas BaseTranslator is not), but in GoTranslator you need to specify a different return type, which won't work in Scala. And the Ast.expr.RegexMatch handling in BaseTranslator

case Ast.expr.RegexMatch(str: Ast.expr, regex: String) => {
detectType(str) match {
case (_: StrType) =>
doRegexMatchOp(translate(str), doRegex(regex))
case _ =>
throw new TypeMismatchError(s"regex match need strings")
}
}

needs to be in GoTranslator as well, because again, it doesn't extend BaseTranslator.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for this deep explanation, I had implemented what you told me, but the thing is I have no log of a call to translateExpr in the GoTranslator. The consequence is that no code is generated to validate the field that needs it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@generalmimon I found that the valid key is not supported by Go in kaitai, so it's normal that no code is generated...

}

override def doCast(value: Ast.expr, typeName: DataType): TranslatorResult = ???

override def doArrayLiteral(t: DataType, value: Seq[Ast.expr]) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ class JavaScriptTranslator(provider: TypeProvider) extends BaseTranslator(provid
override def enumToInt(v: expr, et: EnumType): String =
translate(v)

override def doRegexMatchOp(str: String, regex: String): String = {
s"RegExp(${regex}).test(${str})"
}

override def doRegex(reg: String) : String = doStringLiteral(reg)

/**
* Converts a boolean (true or false) to integer (1 or 0, respectively) in
* JavaScript. There are quite a few methods to so, this one is generally
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ class JavaTranslator(provider: TypeProvider, importList: ImportList) extends Bas
}
}

override def doRegexMatchOp(str: String, regex: String): String = {
importList.add("java.util.regex.Pattern")
s"Pattern.matches(${regex}, ${str})"
}

override def doRegex(reg: String) : String = doStringLiteral(reg)

override def arraySubscript(container: expr, idx: expr): String =
s"${translate(container)}.get((int) ${translate(idx)})"
override def doIfExp(condition: expr, ifTrue: expr, ifFalse: expr): String =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ class LuaTranslator(provider: TypeProvider, importList: ImportList) extends Base
case _ => super.unaryOp(op)
}

override def doRegexMatchOp(str: String, regex: String): String = s"" //TODO

/**
* Converts byte array (Seq[Byte]) into decimal-escaped Lua-style literal
* characters (i.e. like \255).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ class NimTranslator(provider: TypeProvider, importList: ImportList) extends Base

override def strConcat(left: Ast.expr, right: Ast.expr): String = s"${translate(left)} & ${translate(right)}"

override def doRegexMatchOp(str: String, regex: String): String = s"" //TODO

// Members declared in io.kaitai.struct.translators.CommonMethods

override def unaryOp(op: Ast.unaryop): String = op match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ class PHPTranslator(provider: TypeProvider, config: RuntimeConfig) extends BaseT

override def doName(s: String) = s"${Utils.lowerCamelCase(s)}()"

override def doRegexMatchOp(str: String, regex: String): String = {
s"""preg_match(\"${regex}\", ${str})"""
}

override def doRegex(reg: String) : String = s"/" + reg + "/"

override def doEnumByLabel(enumTypeAbs: List[String], label: String): String = {
val enumClass = types2classAbs(enumTypeAbs)
s"$enumClass::${Utils.upperUnderscoreCase(label)}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ class PerlTranslator(provider: TypeProvider, importList: ImportList) extends Bas
}
}

override def doRegexMatchOp(str: String, regex: String): String = {
s"${str} ~= m/${regex}/"
}

override def doEnumByLabel(enumType: List[String], label: String): String = {
val enumClass = PerlCompiler.types2class(enumType.init)
val enumClassWithScope = if (enumClass.isEmpty) "" else s"$enumClass::"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ class PythonTranslator(provider: TypeProvider, importList: ImportList) extends B
override def doIfExp(condition: Ast.expr, ifTrue: Ast.expr, ifFalse: Ast.expr): String =
s"(${translate(ifTrue)} if ${translate(condition)} else ${translate(ifFalse)})"

override def doRegexMatchOp(str: String, regex: String): String = {
importList.add("import re")
s"re.match('${regex}', ${str})"
}

// Predefined methods of various types
override def strToInt(s: Ast.expr, base: Ast.expr): String = {
val baseStr = translate(base)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ class RubyTranslator(provider: TypeProvider) extends BaseTranslator(provider)
}
}

override def doRegexMatchOp(str: String, regex: String): String = {
s"${str}.match ${regex}"
}

override def doRegex(regex: String) : String = s"/" + regex + "/"

override def doEnumByLabel(enumTypeAbs: List[String], label: String): String =
s":${enumTypeAbs.last}_$label"
override def doEnumById(enumType: List[String], id: String): String =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ class RustTranslator(provider: TypeProvider, config: RuntimeConfig) extends Base

override def doName(s: String) = s

override def doRegexMatchOp(str: String, regex: String): String = {
s"""Regex::new(r\"${regex}\").unwrap().is_match(${str});"""
}

override def doEnumByLabel(enumTypeAbs: List[String], label: String): String = {
val enumClass = types2classAbs(enumTypeAbs)
s"$enumClass::${Utils.upperUnderscoreCase(label)}"
Expand Down