JSON

Big Data Technologies - JSON

Big data technologies refer to a collection of technologies designed to efficiently process large volumes, diverse types, and high-velocity data that traditional database management systems cannot handle. Centered around distributed processing frameworks such as Hadoop, Spark, Kafka, and Flink, these technologies form a comprehensive ecosystem for data collection, storage, processing, analysis, and visualization. They serve as the foundation for modern data-driven society, enabling real-time analytics, machine learning, IoT data processing, and business intelligence.

Big Data Distributed Processing Hadoop Spark Kafka Flink Data Engineering Stream Processing Batch Processing
[
  {
    "code": "1",
    "slug": "apache-hadoop",
    "name": "Apache Hadoop",
    "description": "An open-source framework for distributed storage and batch processing.",
    "category": "Distributed Storage & Batch Processing",
    "processingType": "Batch Processing",
    "latency": "Minutes to Hours",
    "initialRelease": "2006",
    "license": "Apache License 2.0"
  },
  {
    "code": "2",
    "slug": "apache-spark",
    "name": "Apache Spark",
    "description": "A high-speed data processing engine using in-memory computation.",
    "category": "General-Purpose Distributed Processing Engine",
    "processingType": "Batch & Stream Processing (Micro-batch)",
    "latency": "Seconds",
    "initialRelease": "2014",
    "license": "Apache License 2.0"
  },
  {
    "code": "3",
    "slug": "apache-kafka",
    "name": "Apache Kafka",
    "description": "A high-throughput distributed streaming platform.",
    "category": "Messaging & Streaming Platform",
    "processingType": "Stream Processing (Messaging)",
    "latency": "Milliseconds",
    "initialRelease": "2011",
    "license": "Apache License 2.0"
  },
  {
    "code": "4",
    "slug": "apache-flink",
    "name": "Apache Flink",
    "description": "A distributed processing engine enabling true stream processing.",
    "category": "Stream Processing Engine",
    "processingType": "True Stream Processing",
    "latency": "Milliseconds",
    "initialRelease": "2015",
    "license": "Apache License 2.0"
  },
  {
    "code": "5",
    "slug": "apache-hive",
    "name": "Apache Hive",
    "description": "Data warehouse software for running SQL-like queries on Hadoop.",
    "category": "Data Warehouse",
    "processingType": "Batch Processing",
    "latency": "Minutes to Hours",
    "initialRelease": "2010",
    "license": "Apache License 2.0"
  },
  {
    "code": "6",
    "slug": "apache-storm",
    "name": "Apache Storm",
    "description": "A distributed real-time computation system.",
    "category": "Stream Processing Engine",
    "processingType": "Stream Processing",
    "latency": "Milliseconds",
    "initialRelease": "2011",
    "license": "Apache License 2.0"
  },
  {
    "code": "7",
    "slug": "apache-hbase",
    "name": "Apache HBase",
    "description": "A distributed NoSQL database running on Hadoop.",
    "category": "NoSQL Database",
    "processingType": "Real-time Read/Write",
    "latency": "Milliseconds",
    "initialRelease": "2010",
    "license": "Apache License 2.0"
  },
  {
    "code": "8",
    "slug": "apache-presto-trino",
    "name": "Apache Trino (formerly PrestoSQL)",
    "description": "A distributed SQL query engine for large-scale data.",
    "category": "Distributed SQL Query Engine",
    "processingType": "Interactive Query",
    "latency": "Seconds to Minutes",
    "initialRelease": "2012",
    "license": "Apache License 2.0"
  }
]