XML
Big Data Technologies - XML
Big data technologies refer to a collection of technologies designed to efficiently process large volumes, diverse types, and high-velocity data that traditional database management systems cannot handle. Centered around distributed processing frameworks such as Hadoop, Spark, Kafka, and Flink, these technologies form a comprehensive ecosystem for data collection, storage, processing, analysis, and visualization. They serve as the foundation for modern data-driven society, enabling real-time analytics, machine learning, IoT data processing, and business intelligence.
Big Data
Distributed Processing
Hadoop
Spark
Kafka
Flink
Data Engineering
Stream Processing
Batch Processing
<?xml version="1.0" encoding="UTF-8"?>
<items>
<item>
<code>1</code>
<slug>apache-hadoop</slug>
<name>Apache Hadoop</name>
<description>An open-source framework for distributed storage and batch processing.</description>
<category>Distributed Storage & Batch Processing</category>
<initialRelease>2006</initialRelease>
<latency>Minutes to Hours</latency>
<license>Apache License 2.0</license>
<processingType>Batch Processing</processingType>
</item>
<item>
<code>2</code>
<slug>apache-spark</slug>
<name>Apache Spark</name>
<description>A high-speed data processing engine using in-memory computation.</description>
<category>General-Purpose Distributed Processing Engine</category>
<initialRelease>2014</initialRelease>
<latency>Seconds</latency>
<license>Apache License 2.0</license>
<processingType>Batch & Stream Processing (Micro-batch)</processingType>
</item>
<item>
<code>3</code>
<slug>apache-kafka</slug>
<name>Apache Kafka</name>
<description>A high-throughput distributed streaming platform.</description>
<category>Messaging & Streaming Platform</category>
<initialRelease>2011</initialRelease>
<latency>Milliseconds</latency>
<license>Apache License 2.0</license>
<processingType>Stream Processing (Messaging)</processingType>
</item>
<item>
<code>4</code>
<slug>apache-flink</slug>
<name>Apache Flink</name>
<description>A distributed processing engine enabling true stream processing.</description>
<category>Stream Processing Engine</category>
<initialRelease>2015</initialRelease>
<latency>Milliseconds</latency>
<license>Apache License 2.0</license>
<processingType>True Stream Processing</processingType>
</item>
<item>
<code>5</code>
<slug>apache-hive</slug>
<name>Apache Hive</name>
<description>Data warehouse software for running SQL-like queries on Hadoop.</description>
<category>Data Warehouse</category>
<initialRelease>2010</initialRelease>
<latency>Minutes to Hours</latency>
<license>Apache License 2.0</license>
<processingType>Batch Processing</processingType>
</item>
<item>
<code>6</code>
<slug>apache-storm</slug>
<name>Apache Storm</name>
<description>A distributed real-time computation system.</description>
<category>Stream Processing Engine</category>
<initialRelease>2011</initialRelease>
<latency>Milliseconds</latency>
<license>Apache License 2.0</license>
<processingType>Stream Processing</processingType>
</item>
<item>
<code>7</code>
<slug>apache-hbase</slug>
<name>Apache HBase</name>
<description>A distributed NoSQL database running on Hadoop.</description>
<category>NoSQL Database</category>
<initialRelease>2010</initialRelease>
<latency>Milliseconds</latency>
<license>Apache License 2.0</license>
<processingType>Real-time Read/Write</processingType>
</item>
<item>
<code>8</code>
<slug>apache-presto-trino</slug>
<name>Apache Trino (formerly PrestoSQL)</name>
<description>A distributed SQL query engine for large-scale data.</description>
<category>Distributed SQL Query Engine</category>
<initialRelease>2012</initialRelease>
<latency>Seconds to Minutes</latency>
<license>Apache License 2.0</license>
<processingType>Interactive Query</processingType>
</item>
</items>