flink1.18 广播流 The Broadcast State Pattern 官方案例scala版本

发布时间:2024年01月22日

对应官网

https://nightlies.apache.org/flink/flink-docs-master/docs/dev/datastream/fault-tolerance/broadcast_state/

测试数据

 * 广播流 官方案例 scala版本
 * 广播状态
 *    https://nightlies.apache.org/flink/flink-docs-master/docs/dev/datastream/fault-tolerance/broadcast_state/
 * 事件流:
 *    red,4side
 *    red,5side
 *    red,1side
 *    red,4side
 *
 * 规则流:
 *    rule1,4side,1side
 * 广播规则流,使用mapstate存储每种规则和对应的事件流数据 eg: {rule1 -> [4side,4side]} 遇到1side到来,则全部输出.
 * map可存储多个规则

完整scala版本代码

package com.yy.state.operatorStateDemo

import org.apache.flink.api.common.state.MapStateDescriptor
import org.apache.flink.api.common.typeinfo.{BasicTypeInfo, TypeHint, TypeInformation}
import org.apache.flink.api.java.functions.KeySelector
import org.apache.flink.api.java.typeutils.ListTypeInfo
import org.apache.flink.streaming.api.datastream.KeyedStream
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment
import org.apache.flink.util.Collector

import java.time.ZoneId
import scala.collection.JavaConverters.iterableAsScalaIterableConverter
import scala.collection.mutable.ListBuffer
import scala.collection.JavaConverters._

/**
 * 广播流 官方案例 scala版本
 * 广播状态
 *    https://nightlies.apache.org/flink/flink-docs-master/docs/dev/datastream/fault-tolerance/broadcast_state/
 * 事件流:
 *    red,4side
 *    red,5side
 *    red,4side
 *    red,1side
 *    red,4side
 *
 * 规则流:
 *    rule1,4side,1side
 * 广播规则流,使用mapstate存储每种规则和对应的事件流数据 eg: {rule1 -> [4side,4side]} 遇到1side到来,则全部输出.
 * map可存储多个规则
 */
object BroadcastStateV1 {

  case class Item(color:Color,shape: Shape){
    def getShape()={
      shape
    }

  }
  case class Rule(name:String,first:Shape,second:Shape)
  case class Color(color:String)
  case class Shape(shape:String)

  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment

    val tEnv = StreamTableEnvironment.create(env)


    // 指定国内时区
    tEnv.getConfig.setLocalTimeZone(ZoneId.of("Asia/Shanghai"))


    val itemStream = env.socketTextStream("localhost", 9999)
      .map(_.split(","))
      .map(arr => Item(Color(arr(0)), Shape(arr(1))))

    val ruleStream = env.socketTextStream("localhost", 9998).broadcast()
      .map(s => Rule(s.split(",")(0), Shape(s.split(",")(1)), Shape(s.split(",")(2))))


    val ruleStateDescriptor = new MapStateDescriptor(
      "RulesBroadcastState",
      BasicTypeInfo.STRING_TYPE_INFO,
      TypeInformation.of(new TypeHint[Rule](){}));
    val ruleBroadcastStream = ruleStream.broadcast(ruleStateDescriptor)


    val colorPartitionedStream: KeyedStream[Item, Color] = itemStream
      .keyBy(new KeySelector[Item, Color] {
        override def getKey(value: Item): Color = value.color
      })


    colorPartitionedStream
      .connect(ruleBroadcastStream)
      .process(

        // type arguments in our KeyedBroadcastProcessFunction represent:
        //   1. the key of the keyed stream
        //   2. the type of elements in the non-broadcast side
        //   3. the type of elements in the broadcast side
        //   4. the type of the result, here a string

        new KeyedBroadcastProcessFunction[Color, Item, Rule, String]() {
          val  mapStateDesc =
            new MapStateDescriptor(
              "items",
              BasicTypeInfo.STRING_TYPE_INFO,
              new ListTypeInfo(classOf[Item]))

          val ruleStateDescriptor =
            new MapStateDescriptor(
              "RulesBroadcastState",
              BasicTypeInfo.STRING_TYPE_INFO,
              TypeInformation.of(new TypeHint[Rule]() {}))

          override def processElement(value: Item, ctx: KeyedBroadcastProcessFunction[Color, Item, Rule, String]#ReadOnlyContext, out: Collector[String]): Unit = {
            val state = getRuntimeContext().getMapState(mapStateDesc)
            val shape = value.getShape()
            // 遍历广播的 rule
            ctx.getBroadcastState(ruleStateDescriptor).immutableEntries().asScala.foreach{
              entry =>
                val ruleName = entry.getKey()
                val rule = entry.getValue()
                val stored: ListBuffer[Item] = {
                  if (state.contains(ruleName)) {
                    state.get(ruleName).asScala.to[ListBuffer]
                  } else {
                    new ListBuffer[Item]()
                  }
                }

                //
                if (shape == rule.second && stored.nonEmpty) {
                  stored.foreach { i =>
                    out.collect("MATCH: " + i + " - " + value);
                  }
                  stored.clear();
                }

                // there is no else{} to cover if rule.first == rule.second
                if (shape.equals(rule.first)) {
                  stored.append(value);
                }

                if (stored.isEmpty) {
                  // 规则已经匹配输出 清理状态
                  state.remove(ruleName)
                } else {
                  // 没输出则更新状态
                  state.put(ruleName, stored.asJava)
                }


            }
          }

          override def processBroadcastElement(value: Rule, ctx: KeyedBroadcastProcessFunction[Color, Item, Rule, String]#Context, out: Collector[String]): Unit = {
            ctx.getBroadcastState(ruleStateDescriptor).put(value.name, value);
          }
        }
      ).print("sink --> ")




    env.execute("flink-broadcast-state")
  }
}



文章来源:https://blog.csdn.net/qq_35515661/article/details/135749730
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。