<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">"use strict";(self.webpackChunkdatabend=self.webpackChunkdatabend||[]).push([[4145],{3905:function(e,t,a){a.d(t,{Zo:function(){return u},kt:function(){return d}});var n=a(67294);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&amp;&amp;(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t&lt;arguments.length;t++){var a=null!=arguments[t]?arguments[t]:{};t%2?i(Object(a),!0).forEach((function(t){r(e,t,a[t])})):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(a)):i(Object(a)).forEach((function(t){Object.defineProperty(e,t,Object.getOwnPropertyDescriptor(a,t))}))}return e}function l(e,t){if(null==e)return{};var a,n,r=function(e,t){if(null==e)return{};var a,n,r={},i=Object.keys(e);for(n=0;n&lt;i.length;n++)a=i[n],t.indexOf(a)&gt;=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n&lt;i.length;n++)a=i[n],t.indexOf(a)&gt;=0||Object.prototype.propertyIsEnumerable.call(e,a)&amp;&amp;(r[a]=e[a])}return r}var s=n.createContext({}),p=function(e){var t=n.useContext(s),a=t;return e&amp;&amp;(a="function"==typeof e?e(t):o(o({},t),e)),a},u=function(e){var t=p(e.components);return n.createElement(s.Provider,{value:t},e.children)},c={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},m=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),m=p(a),d=r,k=m["".concat(s,".").concat(d)]||m[d]||c[d]||i;return a?n.createElement(k,o(o({ref:t},u),{},{components:a})):n.createElement(k,o({ref:t},u))}));function d(e,t){var a=arguments,r=t&amp;&amp;t.mdxType;if("string"==typeof e||r){var i=a.length,o=new Array(i);o[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&amp;&amp;(l[s]=t[s]);l.originalType=e,l.mdxType="string"==typeof e?e:r,o[1]=l;for(var p=2;p&lt;i;p++)o[p]=a[p];return n.createElement.apply(null,o)}return n.createElement.apply(null,a)}m.displayName="MDXCreateElement"},94212:function(e,t,a){a.r(t),a.d(t,{assets:function(){return u},contentTitle:function(){return s},default:function(){return d},frontMatter:function(){return l},metadata:function(){return p},toc:function(){return c}});var n=a(83117),r=a(80102),i=(a(67294),a(3905)),o=["components"],l={title:"What is Databend?",slug:"../"},s=void 0,p={unversionedId:"overview/index",id:"overview/index",title:"What is Databend?",description:"Databend is an open source Elastic and Workload-Aware Modern Cloud Data Warehouse written in Rust from scratch.",source:"@site/../docs/doc/00-overview/index.md",sourceDirName:"00-overview",slug:"/",permalink:"/doc/",editUrl:"https://github.com/datafuselabs/databend/edit/main/databend/../docs/doc/00-overview/index.md",tags:[],version:"current",frontMatter:{title:"What is Databend?",slug:"../"},sidebar:"docs",next:{title:"Deploy",permalink:"/doc/deploy"}},u={},c=[{value:"Design Overview",id:"design-overview",level:2},{value:"Meta Service Layer",id:"meta-service-layer",level:3},{value:"Compute Layer",id:"compute-layer",level:3},{value:"Storage Layer",id:"storage-layer",level:3},{value:"Getting Started",id:"getting-started",level:2},{value:"Community",id:"community",level:2},{value:"Roadmap",id:"roadmap",level:2},{value:"License",id:"license",level:2},{value:"Acknowledgments",id:"acknowledgments",level:2}],m={toc:c};function d(e){var t=e.components,a=(0,r.Z)(e,o);return(0,i.kt)("wrapper",(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"Databend is an open source ",(0,i.kt)("strong",{parentName:"p"},"Elastic")," and ",(0,i.kt)("strong",{parentName:"p"},"Workload-Aware")," Modern Cloud Data Warehouse written in Rust from scratch."),(0,i.kt)("p",null,"Databend uses the latest techniques in vectorized query processing to allow you to do blazing-fast data analytics on Object Storage."),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("p",{parentName:"li"},(0,i.kt)("strong",{parentName:"p"},"Instant Elasticity")),(0,i.kt)("p",{parentName:"li"},"Databend separates the storage and compute, which allows you easily scale up or scale down based on your application's needs.")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("p",{parentName:"li"},(0,i.kt)("strong",{parentName:"p"},"Blazing Performance")),(0,i.kt)("p",{parentName:"li"},"Databend leverages data-level parallelism(Vectorized Query Execution) and instruction-level parallelism(SIMD) technology, offers blazing performance data analytics.")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("p",{parentName:"li"},(0,i.kt)("strong",{parentName:"p"},"Support for Semi-Structured Data")),(0,i.kt)("p",{parentName:"li"},"Databend supports ",(0,i.kt)("a",{parentName:"p",href:"https://databend.rs/doc/load-data"},"ingestion of semi-structured data")," in various formats like CSV, JSON and Parquet which located in cloud or your local file system; Databend also supports semi-structured data types:",(0,i.kt)("a",{parentName:"p",href:"https://databend.rs/doc/reference/data-types/data-type-semi-structured-types"},"VARIANT,OBJECT,ARRAY"),", which is easy to import and operate on semi-structured data (JSON).")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("p",{parentName:"li"},(0,i.kt)("strong",{parentName:"p"},"MySQL/ClickHouse Compatible")),(0,i.kt)("p",{parentName:"li"},"Databend is ANSI SQL compliant and MySQL/ClickHouse wire protocol compatible, making it easy to connect with existing tools.")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("p",{parentName:"li"},(0,i.kt)("strong",{parentName:"p"},"Easy to Use")),(0,i.kt)("p",{parentName:"li"},"Databend has no indexes to build, no manual tuning required, no manual figuring out partitions or shard data, it\u2019s all done for you as data is loaded into table."))),(0,i.kt)("h2",{id:"design-overview"},"Design Overview"),(0,i.kt)("p",null,"This is the high-level architecture of Databend, it consists of three components:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"meta service layer")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"compute layer")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"storage layer"))),(0,i.kt)("p",null,(0,i.kt)("img",{parentName:"p",src:"https://datafuse-1253727613.cos.ap-hongkong.myqcloud.com/arch/datafuse-arch-20210817.svg",alt:"Databend Architecture"})),(0,i.kt)("h3",{id:"meta-service-layer"},"Meta Service Layer"),(0,i.kt)("p",null,"The meta service is a layer to service multiple tenants. This layer implements a persistent key-value store to store each tenant's state.\nIn current implementation, the meta service has many components:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Metadata, which manages all metadata of databases, tables, clusters, the transaction, etc."),(0,i.kt)("li",{parentName:"ul"},"Administration, which stores user info, user management, access control information, usage statistics, etc."),(0,i.kt)("li",{parentName:"ul"},"Security, which performs authorization and authentication to protect the privacy of users' data.")),(0,i.kt)("p",null,"The code of ",(0,i.kt)("inlineCode",{parentName:"p"},"Meta Service Layer")," mainly resides in the ",(0,i.kt)("inlineCode",{parentName:"p"},"metasrv")," directory of the repository."),(0,i.kt)("h3",{id:"compute-layer"},"Compute Layer"),(0,i.kt)("p",null,"The compute layer is the layer to carry out computation for query processing. This layer may consist of many clusters,\nand each cluster may consist of many nodes. Each node is a compute unit, and is a collection of components:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("p",{parentName:"li"},(0,i.kt)("strong",{parentName:"p"},"Planner")),(0,i.kt)("p",{parentName:"li"},"The query planner builds an execution plan from the user's SQL statement and represents the query with different types of relational operators (such as ",(0,i.kt)("inlineCode",{parentName:"p"},"Projection"),", ",(0,i.kt)("inlineCode",{parentName:"p"},"Filter"),", ",(0,i.kt)("inlineCode",{parentName:"p"},"Limit"),", etc.)."),(0,i.kt)("p",{parentName:"li"},"For example:"),(0,i.kt)("pre",{parentName:"li"},(0,i.kt)("code",{parentName:"pre"},"databend :) EXPLAIN SELECT avg(number) FROM numbers(100000) GROUP BY number % 3\n\u250c\u2500explain\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 Projection: avg(number):Float64                                                                                                                                                         \u2502\n\u2502   AggregatorFinal: groupBy=[[(number % 3)]], aggr=[[avg(number)]]                                                                                                                       \u2502\n\u2502     AggregatorPartial: groupBy=[[(number % 3)]], aggr=[[avg(number)]]                                                                                                                   \u2502\n\u2502       Expression: (number % 3):UInt8, number:UInt64 (Before GroupBy)                                                                                                                    \u2502\n\u2502         ReadDataSource: scan schema: [number:UInt64], statistics: [read_rows: 100000, read_bytes: 800000, partitions_scanned: 11, partitions_total: 11], push_downs: [projections: [0]] \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n"))),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("p",{parentName:"li"},(0,i.kt)("strong",{parentName:"p"},"Optimizer")),(0,i.kt)("p",{parentName:"li"},"A rule based optimizer, some rules like predicate push down or pruning of unused columns.")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("p",{parentName:"li"},(0,i.kt)("strong",{parentName:"p"},"Processors")),(0,i.kt)("p",{parentName:"li"},"A Pull&amp;Push-Based query execution pipeline, which is built by planner instructions.\nEach pipeline executor is a processor(such as ",(0,i.kt)("inlineCode",{parentName:"p"},"SourceTransform"),", ",(0,i.kt)("inlineCode",{parentName:"p"},"FilterTransform"),", etc.), it has zero or more inputs and zero or more outputs, and connected as a pipeline, it also can be distributed on multiple nodes judged by your query workload."),(0,i.kt)("p",{parentName:"li"},"For example:"),(0,i.kt)("pre",{parentName:"li"},(0,i.kt)("code",{parentName:"pre"},"databend :) EXPLAIN PIPELINE SELECT avg(number) FROM numbers(100000) GROUP BY number % 3\n\u250c\u2500explain\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 ProjectionTransform \xd7 16 processors                                                    \u2502\n\u2502   Mixed (GroupByFinalTransform \xd7 1 processor) to (ProjectionTransform \xd7 16 processors) \u2502\n\u2502     GroupByFinalTransform \xd7 1 processor                                                \u2502\n\u2502       Merge (GroupByPartialTransform \xd7 16 processors) to (GroupByFinalTransform \xd7 1)   \u2502\n\u2502         GroupByPartialTransform \xd7 16 processors                                        \u2502\n\u2502           ExpressionTransform \xd7 16 processors                                          \u2502\n\u2502             SourceTransform \xd7 16 processors                                            \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n")))),(0,i.kt)("p",null,"Node is the smallest unit of the compute layer. A set of nodes can be registered as one cluster via namespace.\nMany clusters can attach the same database, so they can serve the query in parallel by different users.\nWhen you add new nodes to a cluster, the currently running computational tasks can be scaled(known as work-stealing) guarantee."),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"Compute Layer")," codes mainly in the ",(0,i.kt)("inlineCode",{parentName:"p"},"query")," directory."),(0,i.kt)("h3",{id:"storage-layer"},"Storage Layer"),(0,i.kt)("p",null,"Databend stores data in an efficient, columnar format as Parquet files.\nEach Parquet file is sorted by the primary key before being written to the underlying shared storage.\nFor efficient pruning, Databend also creates indexes for each Parquet file:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"min_max.idx")," The index file stores the ",(0,i.kt)("em",{parentName:"li"},"minimum")," and ",(0,i.kt)("em",{parentName:"li"},"maximum")," value of this Parquet file."),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"sparse.idx")," The index file store the &lt;key, parquet-page&gt; mapping for every ","[N]"," records granularity.")),(0,i.kt)("p",null,"With the indexes, we can speed up the queries by reducing the I/O and CPU cost.\nImagine that Parquet file f1 has ",(0,i.kt)("inlineCode",{parentName:"p"},"min_max.idx")," of ",(0,i.kt)("inlineCode",{parentName:"p"},"[3, 5)")," and Parquet file f2 has ",(0,i.kt)("inlineCode",{parentName:"p"},"min_max.idx")," of ",(0,i.kt)("inlineCode",{parentName:"p"},"[4, 6)")," in column ",(0,i.kt)("inlineCode",{parentName:"p"},"x"),", if the query predicate is ",(0,i.kt)("inlineCode",{parentName:"p"},"WHERE x &lt; 4"),", only f1 needs to be accessed and processed."),(0,i.kt)("h2",{id:"getting-started"},"Getting Started"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"/doc/deploy"},"Databend Deploy")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"/doc/develop"},"Databend Develop")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"/doc/contributing"},"Databend Contributing")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"/doc/performance"},"Databend Performance")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://perf.databend.rs"},"Databend Continuous Benchmarking"))),(0,i.kt)("h2",{id:"community"},"Community"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://join.slack.com/t/datafusecloud/shared_invite/zt-nojrc9up-50IRla1Y1h56rqwCTkkDJA"},"Slack")," (For live discussion with the Community)"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://github.com/datafuselabs/databend"},"Github")," (Feature/Bug reports, Contributions)"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://twitter.com/Datafuse_Labs"},"Twitter")," (Get the news fast)"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://weekly.databend.rs/"},"Weekly")," (A weekly newsletter about the Databend)")),(0,i.kt)("h2",{id:"roadmap"},"Roadmap"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://github.com/datafuselabs/databend/issues/4591"},"Roadmap v0.8")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://github.com/datafuselabs/databend/issues/3706"},"Roadmap 2022"))),(0,i.kt)("h2",{id:"license"},"License"),(0,i.kt)("p",null,"Databend is licensed under Apache 2.0."),(0,i.kt)("h2",{id:"acknowledgments"},"Acknowledgments"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Databend is inspired by ",(0,i.kt)("a",{parentName:"li",href:"https://github.com/clickhouse/clickhouse"},"ClickHouse")," and ",(0,i.kt)("a",{parentName:"li",href:"https://docs.snowflake.com/en/user-guide/intro-key-concepts.html#snowflake-architecture"},"Snowflake"),", its computing model is based on ",(0,i.kt)("a",{parentName:"li",href:"https://arrow.apache.org/"},"apache-arrow"),"."),(0,i.kt)("li",{parentName:"ul"},"The ",(0,i.kt)("a",{parentName:"li",href:"https://databend.rs"},"documentation website")," hosted by ",(0,i.kt)("a",{parentName:"li",href:"https://vercel.com/?utm_source=databend&amp;utm_campaign=oss"},"Vercel"),".")))}d.isMDXComponent=!0}}]);</pre></body></html>