Skip to content

Commit

Permalink
adding openvino Suppor tto multiple Annotators
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmedlone127 committed Nov 6, 2024
1 parent 6117879 commit 9034365
Show file tree
Hide file tree
Showing 56 changed files with 24,478 additions and 442 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

148 changes: 141 additions & 7 deletions src/main/scala/com/johnsnowlabs/ml/ai/Bart.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import com.johnsnowlabs.ml.ai.util.Generation.Generate
import com.johnsnowlabs.ml.onnx.{OnnxSession, OnnxWrapper}
import com.johnsnowlabs.ml.onnx.OnnxWrapper.EncoderDecoderWithoutPastWrappers
import com.johnsnowlabs.ml.onnx.TensorResources.implicits.OnnxSessionResult
import com.johnsnowlabs.ml.openvino.OpenvinoWrapper.{EncoderDecoderWithoutPastWrappers => OpenvinoEncoderDecoderWithoutPastWrappers}
import com.johnsnowlabs.ml.tensorflow.sign.{ModelSignatureConstants, ModelSignatureManager}
import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper}
import com.johnsnowlabs.ml.util.{ONNX, Openvino, TensorFlow}
Expand All @@ -44,6 +45,7 @@ import scala.collection.JavaConverters._
private[johnsnowlabs] class Bart(
val tensorflowWrapper: Option[TensorflowWrapper],
val onnxWrapper: Option[EncoderDecoderWithoutPastWrappers],
val openvinoWrapper: Option[OpenvinoEncoderDecoderWithoutPastWrappers],
configProtoBytes: Option[Array[Byte]] = None,
signatures: Option[Map[String, String]] = None,
merges: Map[(String, String), Int],
Expand All @@ -61,12 +63,16 @@ private[johnsnowlabs] class Bart(
private val paddingTokenId = 1
private val eosTokenId = 2
private val vocabSize = 50264
private var decoderEncoderStateTensorsOV: Option[org.intel.openvino.Tensor] = None
private var encoderAttentionMaskOV: Option[org.intel.openvino.Tensor] = None

var tensorDecoder = new TensorResources()
private var nextStateTensor1: Option[org.tensorflow.Tensor] = None
private var nextStateTensor2: Option[org.tensorflow.Tensor] = None
val detectedEngine: String =
if (tensorflowWrapper.isDefined) TensorFlow.name
else if (onnxWrapper.isDefined) ONNX.name
else if (openvinoWrapper.isDefined) Openvino.name
else TensorFlow.name


Expand All @@ -83,6 +89,20 @@ private[johnsnowlabs] class Bart(
val decoderOutput: String = "logits"
}

private object OpenVinoSignatures {
val encoderInputIDs: String = "input_ids"
val encoderAttentionMask: String = "attention_mask"

val encoderOutput: String = "last_hidden_state"

val decoderInputIDs: String = "input_ids"
val decoderEncoderAttentionMask: String = "encoder_attention_mask"
val decoderEncoderState: String = "encoder_hidden_states"

val decoderOutput: String = "logits"
}


/** @param sentences
* Sequence of WordpieceTokenizedSentence
* @param batchSize
Expand Down Expand Up @@ -341,7 +361,7 @@ private[johnsnowlabs] class Bart(
}
modelOutputs
}
else {
else if (detectedEngine == ONNX.name) { {

var (encoderSession, encoderEnv): (OrtSession, OrtEnvironment) = (null, null)
var (decoderSession, decoderEnv): (OrtSession, OrtEnvironment) = (null, null)
Expand Down Expand Up @@ -415,6 +435,85 @@ private[johnsnowlabs] class Bart(
modelOutputs
}

}
else {

val encoderInferRequest =
openvinoWrapper.get.encoder.getCompiledModel().create_infer_request()
val decoderInferRequest =
openvinoWrapper.get.decoder.getCompiledModel().create_infer_request()


val encoderAttentionMask: org.intel.openvino.Tensor =
new org.intel.openvino.Tensor(
Array(expandedEncoderInputIdsVals.length, expandedEncoderInputIdsVals.head.length),
expandedEncoderInputIdsVals.toArray.map(_.map(_ => 1L)).flatten)

val encoderInputTensors =
new org.intel.openvino.Tensor(
Array(expandedEncoderInputIdsVals.length, expandedEncoderInputIdsVals.head.length),
expandedEncoderInputIdsVals.toArray.map(_.map(_.toLong)).flatten)


encoderInferRequest.set_tensor(OpenVinoSignatures.encoderInputIDs, encoderInputTensors)
encoderInferRequest.set_tensor(OpenVinoSignatures.encoderAttentionMask, encoderAttentionMask)
encoderInferRequest.infer()

val encoderStateBuffer =
try {
val encoderStateTensor = encoderInferRequest.get_tensor(OpenVinoSignatures.encoderOutput)

val shape = encoderStateTensor.get_shape().map(_.toLong)
encoderStateTensor.data()
.grouped(shape(2).toInt)
.toArray
.grouped(shape(1).toInt)
.toArray
} catch {
case e: Exception =>
e.printStackTrace()
Array.empty[Float]
// Rethrow the exception to propagate it further
throw e
}

val decoderEncoderStateTensors =
new org.intel.openvino.Tensor(
Array(encoderStateBuffer.length, encoderStateBuffer.head.length,encoderStateBuffer.head.head.length),
encoderStateBuffer.flatten.flatten)



decoderEncoderStateTensorsOV = Some(decoderEncoderStateTensors)
encoderAttentionMaskOV = Some(encoderAttentionMask)

val modelOutputs = generate(
batch,
null,
null,
decoderInputs,
maxOutputLength,
minOutputLength,
doSample,
beamSize,
1,
temperature,
topK,
topP,
repetitionPenalty,
noRepeatNgramSize,
this.vocabSize,
this.eosTokenId,
this.paddingTokenId,
randomSeed,
ignoreTokenIdsInt,
null,
ovInferRequest = Some(decoderInferRequest))


modelOutputs

}
}

/** Decode a sequence of sentences
Expand Down Expand Up @@ -474,7 +573,6 @@ private[johnsnowlabs] class Bart(
session: Either[Session, (OrtEnvironment, OrtSession)],
ovInferRequest: Option[InferRequest]): Array[Array[Float]] = {


if (detectedEngine == TensorFlow.name) {
// extract decoderEncoderStateTensors, encoderAttentionMaskTensors and Session from LEFT
assert(decoderEncoderStateTensors.isLeft)
Expand Down Expand Up @@ -535,10 +633,16 @@ private[johnsnowlabs] class Bart(
r
else
r
.fetch(_tfBartSignatures
.getOrElse(ModelSignatureConstants.InitCachedOutput1.key, "missing_cache1_out_init"))
.fetch(_tfBartSignatures
.getOrElse(ModelSignatureConstants.InitCachedOutPut2.key, "missing_cache2_out_init"))
.fetch(
_tfBartSignatures
.getOrElse(
ModelSignatureConstants.InitCachedOutput1.key,
"missing_cache1_out_init"))
.fetch(
_tfBartSignatures
.getOrElse(
ModelSignatureConstants.InitCachedOutPut2.key,
"missing_cache2_out_init"))
} else {
sess.runner
.feed(
Expand Down Expand Up @@ -603,7 +707,7 @@ private[johnsnowlabs] class Bart(
decoderInputTensors.close()
nextTokenLogits
}
else {
else if (detectedEngine == ONNX.name) {
val (env, decoderSession) = session.right.get

val decoderInputLength = decoderInputIds.head.length
Expand Down Expand Up @@ -649,6 +753,36 @@ private[johnsnowlabs] class Bart(
decoderOutputs.toArray

}
else {
val decoderInputLength = decoderInputIds.head.length
val sequenceLength =decoderInputLength
val batchSize = encoderInputIds.length

val decoderInputIdsLong: Array[Array[Long]] =
decoderInputIds.map { tokenIds => tokenIds.map(_.toLong) }.
toArray.map { tokenIds =>tokenIds}


val decoderInputIdsLongTensor =
new org.intel.openvino.Tensor(Array(decoderInputIdsLong.length,decoderInputIdsLong.head.length), decoderInputIdsLong.flatten)


ovInferRequest.get.set_tensor(OpenVinoSignatures.decoderInputIDs, decoderInputIdsLongTensor)
ovInferRequest.get.set_tensor(OpenVinoSignatures.decoderEncoderAttentionMask, encoderAttentionMaskOV.get)
ovInferRequest.get.set_tensor(OpenVinoSignatures.decoderEncoderState, decoderEncoderStateTensorsOV.get)

ovInferRequest.get.infer()

val logitsRaw = ovInferRequest.get.get_tensor(OpenVinoSignatures.decoderOutput).data()
val decoderOutputs = (0 until batchSize).map(i => {
logitsRaw
.slice(
i * sequenceLength * vocabSize + (sequenceLength - 1) * vocabSize,
i * sequenceLength * vocabSize + sequenceLength * vocabSize)
})
decoderOutputs.toArray

}
}


Expand Down
31 changes: 29 additions & 2 deletions src/main/scala/com/johnsnowlabs/ml/ai/CLIP.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
package com.johnsnowlabs.ml.ai

import ai.onnxruntime.OnnxTensor
import com.johnsnowlabs.ml.onnx.{OnnxWrapper, OnnxSession, TensorResources}
import com.johnsnowlabs.ml.onnx.{OnnxSession, OnnxWrapper, TensorResources}
import com.johnsnowlabs.ml.openvino.OpenvinoWrapper
import com.johnsnowlabs.ml.tensorflow.TensorflowWrapper
import com.johnsnowlabs.ml.util.LinAlg.{argmax, softmax}
import com.johnsnowlabs.ml.util.{ONNX, TensorFlow}
import com.johnsnowlabs.ml.util.{ONNX, Openvino, TensorFlow}
import com.johnsnowlabs.nlp._
import com.johnsnowlabs.nlp.annotators.common.Sentence
import com.johnsnowlabs.nlp.annotators.cv.feature_extractor.Preprocessor
Expand All @@ -33,13 +34,15 @@ import scala.jdk.CollectionConverters.mapAsJavaMapConverter
private[johnsnowlabs] class CLIP(
val tensorflowWrapper: Option[TensorflowWrapper],
val onnxWrapper: Option[OnnxWrapper],
val openvinoWrapper: Option[OpenvinoWrapper],
configProtoBytes: Option[Array[Byte]] = None,
tokenizer: CLIPTokenizer,
preprocessor: Preprocessor)
extends Serializable {

val detectedEngine: String =
if (tensorflowWrapper.isDefined) TensorFlow.name
else if (openvinoWrapper.isDefined) Openvino.name
else if (onnxWrapper.isDefined) ONNX.name
else throw new IllegalArgumentException("No model engine defined.")

Expand Down Expand Up @@ -94,6 +97,30 @@ private[johnsnowlabs] class CLIP(
val logits = rawLogits.grouped(batchSize).toArray.transpose

logits.map(scores => softmax(scores))

case Openvino.name =>
val tokenTensors =
new org.intel.openvino.Tensor(Array(labels.length,labels.head.length), labels.flatten)
val pixelValuesTensor = new org.intel.openvino.Tensor(Array(batchImages.length,batchImages.head.length,batchImages.head.head.length,batchImages.head.head.head.length),
batchImages.flatten.flatten.flatten)
val attentionMaskTensor =
new org.intel.openvino.Tensor(Array(labels.length,labels.head.length),Array.fill(labels.length, labels.head.length)(1L).flatten)

val inferRequest = openvinoWrapper.get.getCompiledModel().create_infer_request()
inferRequest.set_tensor("input_ids", tokenTensors)
inferRequest.set_tensor("pixel_values", pixelValuesTensor)
inferRequest.set_tensor("attention_mask", attentionMaskTensor)
inferRequest.infer()

val result = inferRequest.get_tensor("logits_per_text")
val rawLogits = result.data()

val batchSize = batchImages.length
val logits = rawLogits.grouped(batchSize).toArray.transpose

logits.map(scores => softmax(scores))


case _ => throw new Exception("Only ONNX is currently supported.")
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,17 @@ import com.johnsnowlabs.nlp.annotators.cv.feature_extractor.Preprocessor
import com.johnsnowlabs.nlp.annotators.cv.util.io.ImageIOUtils
import com.johnsnowlabs.nlp.annotators.cv.util.transform.ImageResizeUtils
import com.johnsnowlabs.ml.onnx.OnnxWrapper
import com.johnsnowlabs.ml.openvino.OpenvinoWrapper

private[johnsnowlabs] class ConvNextClassifier(
tensorflowWrapper: Option[TensorflowWrapper],
onnxWrapper: Option[OnnxWrapper],
openvinoWrapper: Option[OpenvinoWrapper],
configProtoBytes: Option[Array[Byte]] = None,
tags: Map[String, BigInt],
preprocessor: Preprocessor,
signatures: Option[Map[String, String]] = None)
extends ViTClassifier(tensorflowWrapper, onnxWrapper, configProtoBytes, tags, preprocessor, signatures) {
extends ViTClassifier(tensorflowWrapper, onnxWrapper, openvinoWrapper, configProtoBytes, tags, preprocessor, signatures) {

override def encode(
annotations: Array[AnnotationImage],
Expand Down
34 changes: 33 additions & 1 deletion src/main/scala/com/johnsnowlabs/ml/ai/DistilBert.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ package com.johnsnowlabs.ml.ai
import ai.onnxruntime.OnnxTensor
import com.johnsnowlabs.ml.ai.util.PrepareEmbeddings
import com.johnsnowlabs.ml.onnx.{OnnxSession, OnnxWrapper}
import com.johnsnowlabs.ml.openvino.OpenvinoWrapper
import com.johnsnowlabs.ml.tensorflow.sign.{ModelSignatureConstants, ModelSignatureManager}
import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper}
import com.johnsnowlabs.ml.util.{ModelArch, ONNX, TensorFlow}
import com.johnsnowlabs.ml.util.{ModelArch, ONNX, Openvino, TensorFlow}
import com.johnsnowlabs.nlp.annotators.common._
import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}
import org.slf4j.{Logger, LoggerFactory}
Expand Down Expand Up @@ -71,6 +72,7 @@ import scala.collection.JavaConverters._
private[johnsnowlabs] class DistilBert(
val tensorflowWrapper: Option[TensorflowWrapper],
val onnxWrapper: Option[OnnxWrapper],
val openvinoWrapper: Option[OpenvinoWrapper],
sentenceStartTokenId: Int,
sentenceEndTokenId: Int,
configProtoBytes: Option[Array[Byte]] = None,
Expand All @@ -83,6 +85,7 @@ private[johnsnowlabs] class DistilBert(
val detectedEngine: String =
if (tensorflowWrapper.isDefined) TensorFlow.name
else if (onnxWrapper.isDefined) ONNX.name
else if (openvinoWrapper.isDefined) Openvino.name
else TensorFlow.name
private val onnxSessionOptions: Map[String, String] = new OnnxSession().getSessionOptions

Expand Down Expand Up @@ -142,6 +145,35 @@ private[johnsnowlabs] class DistilBert(
tokenTensors.close()
maskTensors.close()
}


case Openvino.name =>

val batchLength = batch.length
val shape = Array(batchLength, maxSentenceLength)
val (tokenTensors, maskTensors) =
PrepareEmbeddings.prepareOvLongBatchTensors(batch, maxSentenceLength, batchLength)

val inferRequest = openvinoWrapper.get.getCompiledModel().create_infer_request()
inferRequest.set_tensor("input_ids", tokenTensors)
inferRequest.set_tensor("attention_mask", maskTensors)

inferRequest.infer()

try {
try {
inferRequest
.get_tensor("last_hidden_state")
.data()
}
} catch {
case e: Exception =>
e.printStackTrace()
Array.empty[Float]
// Rethrow the exception to propagate it further
throw e
}

case _ =>
val tensors = new TensorResources()

Expand Down
Loading

0 comments on commit 9034365

Please sign in to comment.