Spark Streaming实现offset在Zookeeper的读取

Spark Streaming实现offset在Zookeeper的读取

1. KafkaCluster的创建

public static KafkaCluster getKafkaCluster(Map<String, String> kafkaParams) {
	// 将Java的HashMap转化为Scala的mutable.Map
	scala.collection.mutable.Map<String, String> testMap = JavaConversions.mapAsScalaMap (kafkaParams);
	// 将Scala的mutable.Map转化为imutable.Map
	scala.collection.immutable.Map<String, String> scalaKafkaParam =
		testMap.toMap(new Predef.$less$colon$less<Tuple2<String, String>, Tuple2 <String, String>>() {
			public Tuple2<String, String> apply(Tuple2<String, String> v1) {
			return v1;
		   }
		});

	// 由于KafkaCluster的创建需要传入Scala.HashMap类型的参数,因此要进行上述的转换
	// 将immutable.Map类型的Kafka参数传入构造器,创建KafkaCluster
	return new KafkaCluster(scalaKafkaParam);
}

2. 从Zookeeper读取offset

public static Map<TopicAndPartition, Long> getConsumerOffsets(
KafkaCluster kafkaCluster,  
String groupId, 
Set<String> topicSet) {
  // 将Java的Set结构转换为Scala的mutable.Set结构
scala.collection.mutable.Set<String> mutableTopics = JavaConversions.asScalaSet (topicSet);
// 将Scala的mutable.Set结构转换为immutable.Set结构
scala.collection.immutable.Set<String> immutableTopics = mutableTopics.toSet();
// 根据传入的分区,获取TopicAndPartition形式的返回数据
scala.collection.immutable.Set<TopicAndPartition> topicAndPartitionSet2 = (scala. collection.immutable.Set<TopicAndPartition>)kafkaCluster.getPartitions(immutableTopics).right().get();

    // 创建用于存储offset数据的Hashmap
    Map<TopicAndPartition, Long> consumerOffsetsLong = new HashMap();

    // kafkaCluster.getConsumerOffsets:通过kafkaCluster的getConsumerOffsets方法获取指定消费者组合,指定主题分区的offset
// 如果返回Left,代表获取失败,Zookeeper中不存在对应的offset,因此HashMap中对应的offset应该设置为0
if (kafkaCluster.getConsumerOffsets(groupId, topicAndPartitionSet2).isLeft()) {
	   // 将Scala的Set结构转换为Java的Set结构
        Set<TopicAndPartition> topicAndPartitionSet1 = JavaConversions.setAsJavaSet(topic AndPartitionSet2);

// 由于没有保存offset(该group首次消费时), 各个partition offset 默认为0
        for (TopicAndPartition topicAndPartition : topicAndPartitionSet1) {
            consumerOffsetsLong.put(topicAndPartition, 0L);
        }
} else { 
// offset已存在, 获取Zookeeper上的offset
// 获取到的结构为Scala的Map结构
        scala.collection.immutable.Map<TopicAndPartition, Object> consumerOffsetsTemp =
          (scala.collection.immutable.Map<TopicAndPartition, Object>) kafkaCluster. getConsumerOffsets(groupId, topicAndPartitionSet2).right().get();

// 将Scala的Map结构转换为Java的Map结构
        Map<TopicAndPartition, Object> consumerOffsets = JavaConversions.mapAsJavaMap (consumerOffsetsTemp);

// 将Scala的Set结构转换为Java的Set结构
        Set<TopicAndPartition> topicAndPartitionSet1 = JavaConversions.setAsJavaSet (topicAndPartitionSet2);

        // 将offset加入到consumerOffsetsLong的对应项
        for (TopicAndPartition topicAndPartition : topicAndPartitionSet1) {
            Long offset = (Long) consumerOffsets.get(topicAndPartition);
            consumerOffsetsLong.put(topicAndPartition, offset);
        }
    }

    return consumerOffsetsLong;
}

3.Offset写入Zookeeper

public static void offsetToZk(final KafkaCluster kafkaCluster,
						     final AtomicReference<OffsetRange[]> offsetRanges,
							final String groupId) {
  // 遍历每一个偏移量信息
for (OffsetRange o : offsetRanges.get()) {

// 提取offsetRange中的topic和partition信息封装成TopicAndPartition
TopicAndPartition topicAndPartition = new TopicAndPartition(o.topic(), o.partition());
// 创建Map结构保持TopicAndPartition和对应的offset数据
Map<TopicAndPartition, Object> topicAndPartitionObjectMap = new HashMap();
// 将当前offsetRange的topicAndPartition信息和untilOffset信息写入Map
topicAndPartitionObjectMap.put(topicAndPartition, o.untilOffset());

	// 将Java的Map结构转换为Scala的mutable.Map结构
	scala.collection.mutable.Map<TopicAndPartition, Object> testMap = JavaConversions. mapAsScalaMap(topicAndPartitionObjectMap);

	// 将Scala的mutable.Map转化为imutable.Map
	scala.collection.immutable.Map<TopicAndPartition, Object> scalatopicAndPartition ObjectMap =
	testMap.toMap(new Predef.$less$colon$less<Tuple2<TopicAndPartition, Object>, Tuple2 <TopicAndPartition, Object>>() {
	public Tuple2<TopicAndPartition, Object> apply(Tuple2<TopicAndPartition, Object> v1) 
{return v1;}
	});

	// 更新offset到kafkaCluster
	kafkaCluster.setConsumerOffsets(groupId, scalatopicAndPartitionObjectMap);
}
}

final class OffsetRange private(
    val topic: String,
    val partition: Int,
    val fromOffset: Long,
    val untilOffset: Long) 

版权声明:本博客为记录本人自学感悟,转载需注明出处!
https://me.csdn.net/qq_39657909

已标记关键词 清除标记
©️2020 CSDN 皮肤主题: 技术黑板 设计师:CSDN官方博客 返回首页