TOML

Big Data Technologies - TOML

Big data technologies refer to a collection of technologies designed to efficiently process large volumes, diverse types, and high-velocity data that traditional database management systems cannot handle. Centered around distributed processing frameworks such as Hadoop, Spark, Kafka, and Flink, these technologies form a comprehensive ecosystem for data collection, storage, processing, analysis, and visualization. They serve as the foundation for modern data-driven society, enabling real-time analytics, machine learning, IoT data processing, and business intelligence.

Big Data Distributed Processing Hadoop Spark Kafka Flink Data Engineering Stream Processing Batch Processing
[[items]]
code = "1"
slug = "apache-hadoop"
name = "Apache Hadoop"
description = "An open-source framework for distributed storage and batch processing."
category = "Distributed Storage & Batch Processing"
initialRelease = "2006"
latency = "Minutes to Hours"
license = "Apache License 2.0"
processingType = "Batch Processing"

[[items]]
code = "2"
slug = "apache-spark"
name = "Apache Spark"
description = "A high-speed data processing engine using in-memory computation."
category = "General-Purpose Distributed Processing Engine"
initialRelease = "2014"
latency = "Seconds"
license = "Apache License 2.0"
processingType = "Batch & Stream Processing (Micro-batch)"

[[items]]
code = "3"
slug = "apache-kafka"
name = "Apache Kafka"
description = "A high-throughput distributed streaming platform."
category = "Messaging & Streaming Platform"
initialRelease = "2011"
latency = "Milliseconds"
license = "Apache License 2.0"
processingType = "Stream Processing (Messaging)"

[[items]]
code = "4"
slug = "apache-flink"
name = "Apache Flink"
description = "A distributed processing engine enabling true stream processing."
category = "Stream Processing Engine"
initialRelease = "2015"
latency = "Milliseconds"
license = "Apache License 2.0"
processingType = "True Stream Processing"

[[items]]
code = "5"
slug = "apache-hive"
name = "Apache Hive"
description = "Data warehouse software for running SQL-like queries on Hadoop."
category = "Data Warehouse"
initialRelease = "2010"
latency = "Minutes to Hours"
license = "Apache License 2.0"
processingType = "Batch Processing"

[[items]]
code = "6"
slug = "apache-storm"
name = "Apache Storm"
description = "A distributed real-time computation system."
category = "Stream Processing Engine"
initialRelease = "2011"
latency = "Milliseconds"
license = "Apache License 2.0"
processingType = "Stream Processing"

[[items]]
code = "7"
slug = "apache-hbase"
name = "Apache HBase"
description = "A distributed NoSQL database running on Hadoop."
category = "NoSQL Database"
initialRelease = "2010"
latency = "Milliseconds"
license = "Apache License 2.0"
processingType = "Real-time Read/Write"

[[items]]
code = "8"
slug = "apache-presto-trino"
name = "Apache Trino (formerly PrestoSQL)"
description = "A distributed SQL query engine for large-scale data."
category = "Distributed SQL Query Engine"
initialRelease = "2012"
latency = "Seconds to Minutes"
license = "Apache License 2.0"
processingType = "Interactive Query"