JSON
Big Data Technologies - JSON
Big data technologies refer to a collection of technologies designed to efficiently process large volumes, diverse types, and high-velocity data that traditional database management systems cannot handle. Centered around distributed processing frameworks such as Hadoop, Spark, Kafka, and Flink, these technologies form a comprehensive ecosystem for data collection, storage, processing, analysis, and visualization. They serve as the foundation for modern data-driven society, enabling real-time analytics, machine learning, IoT data processing, and business intelligence.
Big Data
Distributed Processing
Hadoop
Spark
Kafka
Flink
Data Engineering
Stream Processing
Batch Processing
[
{
"code": "1",
"slug": "apache-hadoop",
"name": "Apache Hadoop",
"description": "An open-source framework for distributed storage and batch processing.",
"category": "Distributed Storage & Batch Processing",
"processingType": "Batch Processing",
"latency": "Minutes to Hours",
"initialRelease": "2006",
"license": "Apache License 2.0"
},
{
"code": "2",
"slug": "apache-spark",
"name": "Apache Spark",
"description": "A high-speed data processing engine using in-memory computation.",
"category": "General-Purpose Distributed Processing Engine",
"processingType": "Batch & Stream Processing (Micro-batch)",
"latency": "Seconds",
"initialRelease": "2014",
"license": "Apache License 2.0"
},
{
"code": "3",
"slug": "apache-kafka",
"name": "Apache Kafka",
"description": "A high-throughput distributed streaming platform.",
"category": "Messaging & Streaming Platform",
"processingType": "Stream Processing (Messaging)",
"latency": "Milliseconds",
"initialRelease": "2011",
"license": "Apache License 2.0"
},
{
"code": "4",
"slug": "apache-flink",
"name": "Apache Flink",
"description": "A distributed processing engine enabling true stream processing.",
"category": "Stream Processing Engine",
"processingType": "True Stream Processing",
"latency": "Milliseconds",
"initialRelease": "2015",
"license": "Apache License 2.0"
},
{
"code": "5",
"slug": "apache-hive",
"name": "Apache Hive",
"description": "Data warehouse software for running SQL-like queries on Hadoop.",
"category": "Data Warehouse",
"processingType": "Batch Processing",
"latency": "Minutes to Hours",
"initialRelease": "2010",
"license": "Apache License 2.0"
},
{
"code": "6",
"slug": "apache-storm",
"name": "Apache Storm",
"description": "A distributed real-time computation system.",
"category": "Stream Processing Engine",
"processingType": "Stream Processing",
"latency": "Milliseconds",
"initialRelease": "2011",
"license": "Apache License 2.0"
},
{
"code": "7",
"slug": "apache-hbase",
"name": "Apache HBase",
"description": "A distributed NoSQL database running on Hadoop.",
"category": "NoSQL Database",
"processingType": "Real-time Read/Write",
"latency": "Milliseconds",
"initialRelease": "2010",
"license": "Apache License 2.0"
},
{
"code": "8",
"slug": "apache-presto-trino",
"name": "Apache Trino (formerly PrestoSQL)",
"description": "A distributed SQL query engine for large-scale data.",
"category": "Distributed SQL Query Engine",
"processingType": "Interactive Query",
"latency": "Seconds to Minutes",
"initialRelease": "2012",
"license": "Apache License 2.0"
}
]