From 506652039e786b57d9ef7fac1aa22e20853c571f Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 18 Aug 2022 13:34:59 +0200 Subject: [PATCH 01/13] hdfs getting-started --- docs/antora.yml | 1 + docs/modules/ROOT/nav.adoc | 1 - docs/modules/ROOT/pages/installation.adoc | 57 ------- .../examples/code/getting_started.sh | 149 ++++++++++++++++++ .../examples/code/getting_started.sh.j2 | 149 ++++++++++++++++++ .../getting_started/examples/code/hdfs.yaml | 27 +--- .../examples/code/testdata.txt | 3 + .../examples/code/webhdfs.yaml | 23 +++ .../getting_started/examples/code/zk.yaml | 15 ++ .../getting_started/examples/code/znode.yaml | 8 + docs/modules/getting_started/nav.adoc | 3 + .../getting_started/pages/first_steps.adoc | 136 ++++++++++++++++ docs/modules/getting_started/pages/index.adoc | 18 +++ .../getting_started/pages/installation.adoc | 54 +++++++ docs/templating_vars.yaml | 9 ++ scripts/docs_templating.sh | 41 +++++ 16 files changed, 611 insertions(+), 83 deletions(-) delete mode 100644 docs/modules/ROOT/pages/installation.adoc create mode 100755 docs/modules/getting_started/examples/code/getting_started.sh create mode 100755 docs/modules/getting_started/examples/code/getting_started.sh.j2 rename examples/simple-hdfs-cluster.yaml => docs/modules/getting_started/examples/code/hdfs.yaml (76%) create mode 100755 docs/modules/getting_started/examples/code/testdata.txt create mode 100644 docs/modules/getting_started/examples/code/webhdfs.yaml create mode 100644 docs/modules/getting_started/examples/code/zk.yaml create mode 100644 docs/modules/getting_started/examples/code/znode.yaml create mode 100644 docs/modules/getting_started/nav.adoc create mode 100644 docs/modules/getting_started/pages/first_steps.adoc create mode 100644 docs/modules/getting_started/pages/index.adoc create mode 100644 docs/modules/getting_started/pages/installation.adoc create mode 100644 docs/templating_vars.yaml create mode 100755 scripts/docs_templating.sh diff --git a/docs/antora.yml b/docs/antora.yml index a43d455d..4787f226 100644 --- a/docs/antora.yml +++ b/docs/antora.yml @@ -2,5 +2,6 @@ name: hdfs version: "nightly" title: Stackable Operator for Apache HDFS nav: + - modules/getting_started/nav.adoc - modules/ROOT/nav.adoc prerelease: true diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc index ac48abcd..2a7fca20 100644 --- a/docs/modules/ROOT/nav.adoc +++ b/docs/modules/ROOT/nav.adoc @@ -1,4 +1,3 @@ -* xref:installation.adoc[] * xref:configuration.adoc[] * xref:usage.adoc[] * xref:implementation.adoc[] diff --git a/docs/modules/ROOT/pages/installation.adoc b/docs/modules/ROOT/pages/installation.adoc deleted file mode 100644 index 9159ebe5..00000000 --- a/docs/modules/ROOT/pages/installation.adoc +++ /dev/null @@ -1,57 +0,0 @@ -= Installation - -There are two ways to run the HDFS Operator: - -1. Using Helm. - -2. Build from source. - -== Using Helm -Helm allows you to download and deploy Stackable operators on Kubernetes and is by far the easiest installation method. First ensure that you have installed the Stackable Operators Helm repository: - -[source,bash] ----- -helm repo add stackable https://repo.stackable.tech/repository/helm-dev/ -helm repo update stackable ----- -Then install the Stackable Operator for Apache Hadoop - -[source,bash] ----- -helm install hdfs-operator stackable/hdfs-operator ----- - -Helm will deploy the operator in a Kubernetes container and apply the CRDs for the Apache Hdfs service. You're now ready to deploy Apache Hdfs in Kubernetes as described in xref:usage.adoc[Usage]. - -== Build from source - -For development, testing and debugging purposes it is useful to be able to deploy a locally modified operator without the need to publish a container image and/or a helm chart. - -Requirements: -* A recent Rust toolchain to compile the sources. Version 1.58 is the latest at the time of this writing. -* Docker to build the image. -* Optionally a local Kubernetes cluster like `kind` to run the operator and the Hdfs cluster. - -[source,bash] ----- -cargo build -cp target/debug/stackable-hdfs-operator . -docker build -t docker.stackable.tech/stackable/hdfs-operator:0.3.0-nightly -f docker/Dockerfile.devel . -rm stackable-hdfs-operator ----- - -The image can then be loaded in local `kind` cluster like this: - -[source,bash] ----- -kind load docker-image docker.stackable.tech/stackable/hdfs-operator:0.3.0-nightly --name hdfs ----- -and the operator can be deployed by using the local Helm chart: - -[source,bash] ----- -helm install hdfs-operator deploy/helm/hdfs-operator/ ----- - - -Now you can proceed to install a custom Apache Hdfs cluster as described in xref:usage.adoc[Usage]. \ No newline at end of file diff --git a/docs/modules/getting_started/examples/code/getting_started.sh b/docs/modules/getting_started/examples/code/getting_started.sh new file mode 100755 index 00000000..652ee62c --- /dev/null +++ b/docs/modules/getting_started/examples/code/getting_started.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +set -euo pipefail + +# This script contains all the code snippets from the guide, as well as some assert tests +# to test if the instructions in the guide work. The user *could* use it, but it is intended +# for testing only. +# The script will install the operators, create a superset instance and briefly open a port +# forward and connect to the superset instance to make sure it is up and running. +# No running processes are left behind (i.e. the port-forwarding is closed at the end) + +if [ $# -eq 0 ] +then + echo "Installation method argument ('helm' or 'stackablectl') required." + exit 1 +fi + +case "$1" in +"helm") +echo "Adding 'stackable-dev' Helm Chart repository" +# tag::helm-add-repo[] +helm repo add stackable-dev https://repo.stackable.tech/repository/helm-dev/ +# end::helm-add-repo[] +echo "Installing Operators with Helm" +# tag::helm-install-operators[] +helm install --wait zookeeper-operator stackable-dev/zookeeper-operator --version 0.11.0-nightly +helm install --wait hdfs-operator stackable-dev/hdfs-operator --version 0.5.0-nightly +helm install --wait commons-operator stackable-dev/commons-operator --version 0.3.0-nightly +helm install --wait secret-operator stackable-dev/secret-operator --version 0.6.0-nightly +# end::helm-install-operators[] +;; +"stackablectl") +echo "installing Operators with stackablectl" +# tag::stackablectl-install-operators[] +stackablectl operator install \ + commons=0.3.0-nightly \ + secret=0.6.0-nightly \ + zookeeper=0.11.0-nightly \ + hdfs=0.5.0-nightly +# end::stackablectl-install-operators[] +;; +*) +echo "Need to give 'helm' or 'stackablectl' as an argument for which installation method to use!" +exit 1 +;; +esac + +echo "Creating Zookeeper cluster" +# tag::install-zk[] +kubectl apply -f zk.yaml +# end::install-zk[] + +echo "Creating ZNode" +# tag::install-zk[] +kubectl apply -f znode.yaml +# end::install-zk[] + +sleep 5 + +echo "Awaiting Zookeeper rollout finish" +# tag::watch-zk-rollout[] +kubectl rollout status --watch statefulset/simple-zk-server-default +# end::watch-zk-rollout[] + +echo "Creating HDFS cluster" +# tag::install-hdfs[] +kubectl apply -f hdfs.yaml +# end::install-hdfs[] + +sleep 5 + +echo "Awaiting HDFS rollout finish" +# tag::watch-hdfs-rollout[] +kubectl rollout status --watch statefulset/simple-hdfs-datanode-default +kubectl rollout status --watch statefulset/simple-hdfs-namenode-default +kubectl rollout status --watch statefulset/simple-hdfs-journalnode-default +# end::watch-hdfs-rollout[] + +echo "Creating Helper" +# tag::install-webhdfs[] +kubectl apply -f webhdfs.yaml +# end::install-webhdfs[] + +sleep 5 + +echo "Awaiting helper rollout finish" +# tag::watch-helper-rollout[] +kubectl rollout status --watch statefulset/webhdfs +# end::watch-helper-rollout[] + +file_status() { + # tag::file-status[] + kubectl exec -n default webhdfs-0 -- curl -s -XGET "http://simple-hdfs-namenode-default-0:9870/webhdfs/v1/?op=LISTSTATUS" + # end::file-status[] +} + +echo "Confirm that HDFS is empty..." +status=$(file_status | jq -r '.FileStatuses.FileStatus') + +if [ "$status" == "[]" ]; then + echo "As expected, HDFS is empty" +else + echo "Detected status: $status" + exit 1 +fi + +echo "Copy test file" +# tag::copy-file[] +kubectl cp -n default ./testdata.txt webhdfs-0:/tmp +# end::copy-file[] + +create_file() { + # tag::create-file[] + kubectl exec -n default webhdfs-0 -- \ + curl -s -XPUT -T /tmp/testdata.txt "http://simple-hdfs-namenode-default-0:9870/webhdfs/v1/testdata.txt?user.name=stackable&op=CREATE&noredirect=true" + # end::create-file[] +} + +location=$(create_file | jq -r '.Location') + +echo "Redirect location: $location" + +create_redirected() { + # tag::create-file-redirected[] + kubectl exec -n default webhdfs-0 -- curl -s -XPUT -T /tmp/testdata.txt "$location" + # end::create-file-redirected[] +} + +redirected=$(create_redirected) + +echo "Confirm that HDFS is *not* empty..." +found_file=$(file_status | jq -r '.FileStatuses.FileStatus[0].pathSuffix') +echo "Created file: $found_file with status $(file_status)" + +echo "Delete file" +delete_file() { + # tag::delete-file[] + kubectl exec -n default webhdfs-0 -- curl -s -XDELETE "http://simple-hdfs-namenode-default-0:9870/webhdfs/v1/testdata.txt?user.name=stackable&op=DELETE" + # end::delete-file[] +} + +deleted=$(delete_file | jq -r '.boolean') + +if [ "$deleted" == "true" ]; then + echo "File was deleted!" +else + echo "Detected status: $deleted" + exit 1 +fi + diff --git a/docs/modules/getting_started/examples/code/getting_started.sh.j2 b/docs/modules/getting_started/examples/code/getting_started.sh.j2 new file mode 100755 index 00000000..798b78f1 --- /dev/null +++ b/docs/modules/getting_started/examples/code/getting_started.sh.j2 @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +set -euo pipefail + +# This script contains all the code snippets from the guide, as well as some assert tests +# to test if the instructions in the guide work. The user *could* use it, but it is intended +# for testing only. +# The script will install the operators, create a superset instance and briefly open a port +# forward and connect to the superset instance to make sure it is up and running. +# No running processes are left behind (i.e. the port-forwarding is closed at the end) + +if [ $# -eq 0 ] +then + echo "Installation method argument ('helm' or 'stackablectl') required." + exit 1 +fi + +case "$1" in +"helm") +echo "Adding 'stackable-dev' Helm Chart repository" +# tag::helm-add-repo[] +helm repo add stackable-dev https://repo.stackable.tech/repository/helm-dev/ +# end::helm-add-repo[] +echo "Installing Operators with Helm" +# tag::helm-install-operators[] +helm install --wait zookeeper-operator stackable-dev/zookeeper-operator --version {{ versions.zookeeper }} +helm install --wait hdfs-operator stackable-dev/hdfs-operator --version {{ versions.hdfs }} +helm install --wait commons-operator stackable-dev/commons-operator --version {{ versions.commons }} +helm install --wait secret-operator stackable-dev/secret-operator --version {{ versions.secret }} +# end::helm-install-operators[] +;; +"stackablectl") +echo "installing Operators with stackablectl" +# tag::stackablectl-install-operators[] +stackablectl operator install \ + commons={{ versions.commons }} \ + secret={{ versions.secret }} \ + zookeeper={{ versions.zookeeper }} \ + hdfs={{ versions.hdfs }} +# end::stackablectl-install-operators[] +;; +*) +echo "Need to give 'helm' or 'stackablectl' as an argument for which installation method to use!" +exit 1 +;; +esac + +echo "Creating Zookeeper cluster" +# tag::install-zk[] +kubectl apply -f zk.yaml +# end::install-zk[] + +echo "Creating ZNode" +# tag::install-zk[] +kubectl apply -f znode.yaml +# end::install-zk[] + +sleep 5 + +echo "Awaiting Zookeeper rollout finish" +# tag::watch-zk-rollout[] +kubectl rollout status --watch statefulset/simple-zk-server-default +# end::watch-zk-rollout[] + +echo "Creating HDFS cluster" +# tag::install-hdfs[] +kubectl apply -f hdfs.yaml +# end::install-hdfs[] + +sleep 5 + +echo "Awaiting HDFS rollout finish" +# tag::watch-hdfs-rollout[] +kubectl rollout status --watch statefulset/simple-hdfs-datanode-default +kubectl rollout status --watch statefulset/simple-hdfs-namenode-default +kubectl rollout status --watch statefulset/simple-hdfs-journalnode-default +# end::watch-hdfs-rollout[] + +echo "Creating Helper" +# tag::install-webhdfs[] +kubectl apply -f webhdfs.yaml +# end::install-webhdfs[] + +sleep 5 + +echo "Awaiting helper rollout finish" +# tag::watch-helper-rollout[] +kubectl rollout status --watch statefulset/webhdfs +# end::watch-helper-rollout[] + +file_status() { + # tag::file-status[] + kubectl exec -n default webhdfs-0 -- curl -s -XGET "http://simple-hdfs-namenode-default-0:9870/webhdfs/v1/?op=LISTSTATUS" + # end::file-status[] +} + +echo "Confirm that HDFS is empty..." +status=$(file_status | jq -r '.FileStatuses.FileStatus') + +if [ "$status" == "[]" ]; then + echo "As expected, HDFS is empty" +else + echo "Detected status: $status" + exit 1 +fi + +echo "Copy test file" +# tag::copy-file[] +kubectl cp -n default ./testdata.txt webhdfs-0:/tmp +# end::copy-file[] + +create_file() { + # tag::create-file[] + kubectl exec -n default webhdfs-0 -- \ + curl -s -XPUT -T /tmp/testdata.txt "http://simple-hdfs-namenode-default-0:9870/webhdfs/v1/testdata.txt?user.name=stackable&op=CREATE&noredirect=true" + # end::create-file[] +} + +location=$(create_file | jq -r '.Location') + +echo "Redirect location: $location" + +create_redirected() { + # tag::create-file-redirected[] + kubectl exec -n default webhdfs-0 -- curl -s -XPUT -T /tmp/testdata.txt "$location" + # end::create-file-redirected[] +} + +redirected=$(create_redirected) + +echo "Confirm that HDFS is *not* empty..." +found_file=$(file_status | jq -r '.FileStatuses.FileStatus[0].pathSuffix') +echo "Created file: $found_file with status $(file_status)" + +echo "Delete file" +delete_file() { + # tag::delete-file[] + kubectl exec -n default webhdfs-0 -- curl -s -XDELETE "http://simple-hdfs-namenode-default-0:9870/webhdfs/v1/testdata.txt?user.name=stackable&op=DELETE" + # end::delete-file[] +} + +deleted=$(delete_file | jq -r '.boolean') + +if [ "$deleted" == "true" ]; then + echo "File was deleted!" +else + echo "Detected status: $deleted" + exit 1 +fi + diff --git a/examples/simple-hdfs-cluster.yaml b/docs/modules/getting_started/examples/code/hdfs.yaml similarity index 76% rename from examples/simple-hdfs-cluster.yaml rename to docs/modules/getting_started/examples/code/hdfs.yaml index df4ff651..4d7ec233 100644 --- a/examples/simple-hdfs-cluster.yaml +++ b/docs/modules/getting_started/examples/code/hdfs.yaml @@ -1,27 +1,4 @@ --- -apiVersion: zookeeper.stackable.tech/v1alpha1 -kind: ZookeeperCluster -metadata: - name: simple-zk -spec: - version: 3.5.8-stackable0.7.0 - servers: - roleGroups: - default: - selector: - matchLabels: - kubernetes.io/os: linux - replicas: 3 - config: {} ---- -apiVersion: zookeeper.stackable.tech/v1alpha1 -kind: ZookeeperZnode -metadata: - name: simple-hdfs-znode -spec: - clusterRef: - name: simple-zk ---- apiVersion: hdfs.stackable.tech/v1alpha1 kind: HdfsCluster metadata: @@ -67,7 +44,7 @@ spec: selector: matchLabels: kubernetes.io/os: linux - replicas: 3 + replicas: 1 journalNodes: config: resources: @@ -81,4 +58,4 @@ spec: selector: matchLabels: kubernetes.io/os: linux - replicas: 3 + replicas: 1 diff --git a/docs/modules/getting_started/examples/code/testdata.txt b/docs/modules/getting_started/examples/code/testdata.txt new file mode 100755 index 00000000..d63c3679 --- /dev/null +++ b/docs/modules/getting_started/examples/code/testdata.txt @@ -0,0 +1,3 @@ +What is Lorem Ipsum? + +Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum. diff --git a/docs/modules/getting_started/examples/code/webhdfs.yaml b/docs/modules/getting_started/examples/code/webhdfs.yaml new file mode 100644 index 00000000..7ab34684 --- /dev/null +++ b/docs/modules/getting_started/examples/code/webhdfs.yaml @@ -0,0 +1,23 @@ +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: webhdfs + labels: + app: webhdfs +spec: + replicas: 1 + serviceName: webhdfs-svc + selector: + matchLabels: + app: webhdfs + template: + metadata: + labels: + app: webhdfs + spec: + containers: + - name: webhdfs + image: docker.stackable.tech/stackable/testing-tools:0.1.0-stackable0.1.0 + stdin: true + tty: true diff --git a/docs/modules/getting_started/examples/code/zk.yaml b/docs/modules/getting_started/examples/code/zk.yaml new file mode 100644 index 00000000..78eb9b8c --- /dev/null +++ b/docs/modules/getting_started/examples/code/zk.yaml @@ -0,0 +1,15 @@ +--- +apiVersion: zookeeper.stackable.tech/v1alpha1 +kind: ZookeeperCluster +metadata: + name: simple-zk +spec: + version: 3.8.0-stackable0.7.1 + servers: + roleGroups: + default: + selector: + matchLabels: + kubernetes.io/os: linux + replicas: 1 + config: {} diff --git a/docs/modules/getting_started/examples/code/znode.yaml b/docs/modules/getting_started/examples/code/znode.yaml new file mode 100644 index 00000000..8b340ac1 --- /dev/null +++ b/docs/modules/getting_started/examples/code/znode.yaml @@ -0,0 +1,8 @@ +--- +apiVersion: zookeeper.stackable.tech/v1alpha1 +kind: ZookeeperZnode +metadata: + name: simple-hdfs-znode +spec: + clusterRef: + name: simple-zk diff --git a/docs/modules/getting_started/nav.adoc b/docs/modules/getting_started/nav.adoc new file mode 100644 index 00000000..4fdd6221 --- /dev/null +++ b/docs/modules/getting_started/nav.adoc @@ -0,0 +1,3 @@ +* xref:index.adoc[] +** xref:installation.adoc[] +** xref:first_steps.adoc[] \ No newline at end of file diff --git a/docs/modules/getting_started/pages/first_steps.adoc b/docs/modules/getting_started/pages/first_steps.adoc new file mode 100644 index 00000000..2f4a2ec2 --- /dev/null +++ b/docs/modules/getting_started/pages/first_steps.adoc @@ -0,0 +1,136 @@ += First steps + +Once you have followed the steps in the xref:installation.adoc[] section to install the operator and its dependencies, you will now deploy an HDFS cluster and its dependencies. Afterwards you can <<_verify_that_it_works, verify that it works>> by creating, verifying and deleting a test file in HDFS. + +== Setup + +=== Zookeeper + +To deploy a Zookeeper cluster create one file called `zk.yaml`: + +[source,yaml] +include::example$code/zk.yaml[] + +We also need to define a `ZNode` that will be used by the HDFS cluster to reference Zookeeper. Create another file called `znode.yaml`: + +[source,yaml] +include::example$code/znode.yaml[] + +Apply both of these files: + +[source] +include::example$code/getting_started.sh[tag=install-zk] + +The state of the Zookeeper cluster can be tracked with `kubectl`: + +[source] +include::example$code/getting_started.sh[tag=watch-zk-rollout] + +=== HDFS + +An HDFS cluster has three components: then `namenode`, the `datanode` and the `journalnode`. Create a file named `hdfs.yaml` defining 2 `namenodes` and one `datanode` and `journalnode` each: + +[source,yaml] +---- +include::example$code/hdfs.yaml[] +---- + +Where: + +- `metadata.name` contains the name of the HDFS cluster +- the label of the Docker image provided by Stackable must be set in `spec.version` + +NOTE: Please note that the version you need to specify for `spec.version` is not only the version of Hadoop which you want to roll out, but has to be amended with a Stackable version as shown. This Stackable version is the version of the underlying container image which is used to execute the processes. For a list of available versions please check our +https://repo.stackable.tech/#browse/browse:docker:v2%2Fstackable%hadoop%2Ftags[image registry]. +It should generally be safe to simply use the latest image version that is available. + +Create the actual HDFS cluster by applying the file: + +---- +include::example$code/getting_started.sh[tag=install-hdfs] +---- + +Track the progress with `kubectl` as this step may take a few minutes: + +[source] +include::example$code/getting_started.sh[tag=watch-hdfs-rollout] + + +== Verify that it works + +To test the cluster you can create a new file, check its status and then delete it. We will execute these actions from within a helper pod. Create file called `webhdfs.yaml`: + +[source,yaml] +---- +include::example$code/webhdfs.yaml[] +---- + +Apply it watch its progress: + +[source] +include::example$code/getting_started.sh[tag=install-webhdfs] +include::example$code/getting_started.sh[tag=watch-helper-rollout] + +To begin with the cluster should be empty: this can be verified by listing all resources at the root directory (which should return an empty array!): + +[source] +include::example$code/getting_started.sh[tag=file-status] + +Creating a file in HDFS using the https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html#Create_and_Write_to_a_File[Webhdfs API] requires a two-step `PUT` (the reason of having two-step create/append is for preventing clients to send out data before the redirect). First, create a file with some text in it called `testdata.txt` and copy it to the `tmp` directory on the helper pod: + +[source] +include::example$code/getting_started.sh[tag=copy-file] + +Then use `curl` to issue a `PUT` command: + +[source] +include::example$code/getting_started.sh[tag=create-file] + +This will return a location that will look something like this: + +[source] +http://simple-hdfs-datanode-default-0.simple-hdfs-datanode-default.default.svc.cluster.local:9864/webhdfs/v1/testdata.txt?op=CREATE&user.name=stackable&namenoderpcaddress=simple-hdfs&createflag=&createparent=true&overwrite=false + +Assuming this value is assigned to a local variable (alternatively you can copy-and-paste it into the URL) called `$location`, issue a second `PUT` like this: + +[source] +include::example$code/getting_started.sh[tag=create-file-redirected] + +Rechecking the status again with: + +[source] +include::example$code/getting_started.sh[tag=file-status] + +will now display some metadata about the file that was created in the HDFS cluster: + +[source,json] +{ + "FileStatuses": { + "FileStatus": [ + { + "accessTime": 1660821734999, + "blockSize": 134217728, + "childrenNum": 0, + "fileId": 16396, + "group": "supergroup", + "length": 597, + "modificationTime": 1660821735602, + "owner": "stackable", + "pathSuffix": "testdata.txt", + "permission": "644", + "replication": 3, + "storagePolicy": 0, + "type": "FILE" + } + ] + } +} + +To clean up, the file can be deleted like this: + +[source] +include::example$code/getting_started.sh[tag=delete-file] + +== What's next + +Look at the xref:ROOT:usage.adoc[Usage page] to find out more about configuring your HDFS cluster. \ No newline at end of file diff --git a/docs/modules/getting_started/pages/index.adoc b/docs/modules/getting_started/pages/index.adoc new file mode 100644 index 00000000..0b4614b7 --- /dev/null +++ b/docs/modules/getting_started/pages/index.adoc @@ -0,0 +1,18 @@ += Getting started + +This guide will get you started with HDFS using the Stackable Operator. It will guide you through the installation of the Operator and its dependencies, setting up your first HDFS cluster and ... + +== Prerequisites + +You will need: + +* a Kubernetes cluster +* kubectl +* Helm + +== What's next + +The Guide is divided into two steps: + +* xref:installation.adoc[Installing the Operators]. +* xref:first_steps.adoc[Setting up the HDFS cluster and verifying it works]. \ No newline at end of file diff --git a/docs/modules/getting_started/pages/installation.adoc b/docs/modules/getting_started/pages/installation.adoc new file mode 100644 index 00000000..9e81e055 --- /dev/null +++ b/docs/modules/getting_started/pages/installation.adoc @@ -0,0 +1,54 @@ += Installation + +On this page you will install the Stackable HDFS operator and its dependency, the Zookeeper operator, as well as the commons and secret operators which are required by all Stackable operators. + +== Stackable Operators + +There are 2 ways to run Stackable Operators + +1. Using xref:stackablectl::index.adoc[] + +1. Using Helm + +=== stackablectl + +stackablectl is the command line tool to interact with Stackable operators and our recommended way to install operators. +Follow the xref:stackablectl::installation.adoc[installation steps] for your platform. + +After you have installed stackablectl run the following command to install all operators necessary for Zookeeper: + +[source,bash] +---- +include::example$code/getting_started.sh[tag=stackablectl-install-operators] +---- + +The tool will show + +---- +[INFO ] Installing commons operator +[INFO ] Installing secret operator +[INFO ] Installing zookeeper operator +[INFO ] Installing hdfs operator +---- + +TIP: Consult the xref:stackablectl::quickstart.adoc[] to learn more about how to use stackablectl. For example, you can use the `-k` flag to create a Kubernetes cluster with link:https://kind.sigs.k8s.io/[kind]. + +=== Helm + +You can also use Helm to install the operators. Add the Stackable Helm repository: +[source,bash] +---- +include::example$code/getting_started.sh[tag=helm-add-repo] +---- + +Then install the Stackable Operators: +[source,bash] +---- +include::example$code/getting_started.sh[tag=helm-install-operators] +---- + +Helm will deploy the operators in a Kubernetes Deployment and apply the CRDs for the HDFS cluster (as well as the CRDs for the required operators). You are now ready to deploy HDFS in Kubernetes. + +== What's next + +xref:first_steps.adoc[Set up an HDFS cluster] and its dependencies and xref:first_steps.adoc#_verify_that_it_works[verify that it works]. \ No newline at end of file diff --git a/docs/templating_vars.yaml b/docs/templating_vars.yaml new file mode 100644 index 00000000..61937d1b --- /dev/null +++ b/docs/templating_vars.yaml @@ -0,0 +1,9 @@ +--- +helm: + repo_name: stackable-dev + repo_url: https://repo.stackable.tech/repository/helm-dev/ +versions: + commons: 0.3.0-nightly + secret: 0.6.0-nightly + zookeeper: 0.11.0-nightly + hdfs: 0.5.0-nightly \ No newline at end of file diff --git a/scripts/docs_templating.sh b/scripts/docs_templating.sh new file mode 100755 index 00000000..ed5b9b01 --- /dev/null +++ b/scripts/docs_templating.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Reads a file with variables to insert into templates, and templates all .*.j2 files +# in the 'docs' directory. +# +# dependencies +# pip install jinja2-cli + +docs_dir="$(dirname "$0")/../docs" +templating_vars_file="$docs_dir/templating_vars.yaml" + +# Check if files need templating +if [[ -z $(find "$docs_dir" -name '*.j2') ]]; +then + echo "No files need templating, exiting." + exit +fi + +# Check if jinja2 is there +if ! command -v jinja2 &> /dev/null +then + echo "jinja2 could not be found. Use 'pip install jinja2-cli' to install it." + exit +fi + +# Check if templating vars file exists +if [[ ! -f "$templating_vars_file" ]]; +then + echo "$templating_vars_file does not exist, cannot start templating." +fi + +find "$docs_dir" -name '*.j2' | +while read -r file +do + new_file_name=${file%.j2} # Remove .j2 suffix + echo "templating $new_file_name" + jinja2 "$file" "$templating_vars_file" -o "$new_file_name" +done + +echo "done" From cadd8ea21cec682c506fe80f43072580f3be531b Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 18 Aug 2022 14:16:41 +0200 Subject: [PATCH 02/13] linter --- docs/templating_vars.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/templating_vars.yaml b/docs/templating_vars.yaml index 61937d1b..f1c8d3f1 100644 --- a/docs/templating_vars.yaml +++ b/docs/templating_vars.yaml @@ -6,4 +6,4 @@ versions: commons: 0.3.0-nightly secret: 0.6.0-nightly zookeeper: 0.11.0-nightly - hdfs: 0.5.0-nightly \ No newline at end of file + hdfs: 0.5.0-nightly From b058868cc903372e467734f1a73bae2715519b36 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 18 Aug 2022 14:26:07 +0200 Subject: [PATCH 03/13] minor cleanup --- .../getting_started/examples/code/getting_started.sh | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/docs/modules/getting_started/examples/code/getting_started.sh b/docs/modules/getting_started/examples/code/getting_started.sh index 652ee62c..3c51fac0 100755 --- a/docs/modules/getting_started/examples/code/getting_started.sh +++ b/docs/modules/getting_started/examples/code/getting_started.sh @@ -119,13 +119,9 @@ location=$(create_file | jq -r '.Location') echo "Redirect location: $location" -create_redirected() { - # tag::create-file-redirected[] - kubectl exec -n default webhdfs-0 -- curl -s -XPUT -T /tmp/testdata.txt "$location" - # end::create-file-redirected[] -} - -redirected=$(create_redirected) +# tag::create-file-redirected[] +kubectl exec -n default webhdfs-0 -- curl -s -XPUT -T /tmp/testdata.txt "$location" +# end::create-file-redirected[] echo "Confirm that HDFS is *not* empty..." found_file=$(file_status | jq -r '.FileStatuses.FileStatus[0].pathSuffix') From 2e7665a574c5163fec40425309d08fdcd916e1bb Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 18 Aug 2022 14:27:44 +0200 Subject: [PATCH 04/13] templating correction --- .../examples/code/getting_started.sh.j2 | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/docs/modules/getting_started/examples/code/getting_started.sh.j2 b/docs/modules/getting_started/examples/code/getting_started.sh.j2 index 798b78f1..2510a80b 100755 --- a/docs/modules/getting_started/examples/code/getting_started.sh.j2 +++ b/docs/modules/getting_started/examples/code/getting_started.sh.j2 @@ -119,13 +119,9 @@ location=$(create_file | jq -r '.Location') echo "Redirect location: $location" -create_redirected() { - # tag::create-file-redirected[] - kubectl exec -n default webhdfs-0 -- curl -s -XPUT -T /tmp/testdata.txt "$location" - # end::create-file-redirected[] -} - -redirected=$(create_redirected) +# tag::create-file-redirected[] +kubectl exec -n default webhdfs-0 -- curl -s -XPUT -T /tmp/testdata.txt "$location" +# end::create-file-redirected[] echo "Confirm that HDFS is *not* empty..." found_file=$(file_status | jq -r '.FileStatuses.FileStatus[0].pathSuffix') From 73980e0140dd6c02d6c4bcacf2dd9ef2a2bc9597 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 18 Aug 2022 15:28:59 +0200 Subject: [PATCH 05/13] corrections --- docs/modules/getting_started/pages/first_steps.adoc | 8 ++++---- docs/modules/getting_started/pages/installation.adoc | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/modules/getting_started/pages/first_steps.adoc b/docs/modules/getting_started/pages/first_steps.adoc index 2f4a2ec2..80c12a8a 100644 --- a/docs/modules/getting_started/pages/first_steps.adoc +++ b/docs/modules/getting_started/pages/first_steps.adoc @@ -58,14 +58,14 @@ include::example$code/getting_started.sh[tag=watch-hdfs-rollout] == Verify that it works -To test the cluster you can create a new file, check its status and then delete it. We will execute these actions from within a helper pod. Create file called `webhdfs.yaml`: +To test the cluster you can create a new file, check its status and then delete it. We will execute these actions from within a helper pod. Create a file called `webhdfs.yaml`: [source,yaml] ---- include::example$code/webhdfs.yaml[] ---- -Apply it watch its progress: +Apply it and monitor its progress: [source] include::example$code/getting_started.sh[tag=install-webhdfs] @@ -76,7 +76,7 @@ To begin with the cluster should be empty: this can be verified by listing all [source] include::example$code/getting_started.sh[tag=file-status] -Creating a file in HDFS using the https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html#Create_and_Write_to_a_File[Webhdfs API] requires a two-step `PUT` (the reason of having two-step create/append is for preventing clients to send out data before the redirect). First, create a file with some text in it called `testdata.txt` and copy it to the `tmp` directory on the helper pod: +Creating a file in HDFS using the https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html#Create_and_Write_to_a_File[Webhdfs API] requires a two-step `PUT` (the reason for having a two-step create/append is to prevent clients from sending out data before the redirect). First, create a file with some text in it called `testdata.txt` and copy it to the `tmp` directory on the helper pod: [source] include::example$code/getting_started.sh[tag=copy-file] @@ -91,7 +91,7 @@ This will return a location that will look something like this: [source] http://simple-hdfs-datanode-default-0.simple-hdfs-datanode-default.default.svc.cluster.local:9864/webhdfs/v1/testdata.txt?op=CREATE&user.name=stackable&namenoderpcaddress=simple-hdfs&createflag=&createparent=true&overwrite=false -Assuming this value is assigned to a local variable (alternatively you can copy-and-paste it into the URL) called `$location`, issue a second `PUT` like this: +You can assign this to a local variable - e.g. `$location` - or you can copy-and-paste it into the URL, and then issue a second `PUT` like this: [source] include::example$code/getting_started.sh[tag=create-file-redirected] diff --git a/docs/modules/getting_started/pages/installation.adoc b/docs/modules/getting_started/pages/installation.adoc index 9e81e055..e7cf7784 100644 --- a/docs/modules/getting_started/pages/installation.adoc +++ b/docs/modules/getting_started/pages/installation.adoc @@ -15,7 +15,7 @@ There are 2 ways to run Stackable Operators stackablectl is the command line tool to interact with Stackable operators and our recommended way to install operators. Follow the xref:stackablectl::installation.adoc[installation steps] for your platform. -After you have installed stackablectl run the following command to install all operators necessary for Zookeeper: +After you have installed stackablectl run the following command to install all operators necessary for the HDFS cluster: [source,bash] ---- From 0573a26013b0be344f7427c20bacfe5f4f2dea5d Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 18 Aug 2022 16:38:09 +0200 Subject: [PATCH 06/13] Update docs/modules/getting_started/pages/index.adoc Co-authored-by: Felix Hennig --- docs/modules/getting_started/pages/index.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/getting_started/pages/index.adoc b/docs/modules/getting_started/pages/index.adoc index 0b4614b7..f7c5cc7d 100644 --- a/docs/modules/getting_started/pages/index.adoc +++ b/docs/modules/getting_started/pages/index.adoc @@ -8,7 +8,7 @@ You will need: * a Kubernetes cluster * kubectl -* Helm +* optional: Helm == What's next From 973d88a538bfd622e91c2c623512a3ab42223c69 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 18 Aug 2022 16:38:23 +0200 Subject: [PATCH 07/13] Update docs/modules/getting_started/pages/first_steps.adoc Co-authored-by: Felix Hennig --- docs/modules/getting_started/pages/first_steps.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/getting_started/pages/first_steps.adoc b/docs/modules/getting_started/pages/first_steps.adoc index 80c12a8a..32661b61 100644 --- a/docs/modules/getting_started/pages/first_steps.adoc +++ b/docs/modules/getting_started/pages/first_steps.adoc @@ -11,7 +11,7 @@ To deploy a Zookeeper cluster create one file called `zk.yaml`: [source,yaml] include::example$code/zk.yaml[] -We also need to define a `ZNode` that will be used by the HDFS cluster to reference Zookeeper. Create another file called `znode.yaml`: +We also need to define a ZNode that will be used by the HDFS cluster to reference Zookeeper. Create another file called `znode.yaml`: [source,yaml] include::example$code/znode.yaml[] From e00e1eff3cc7ca3d93a99f62c610c0bc6673c6c0 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 18 Aug 2022 16:38:33 +0200 Subject: [PATCH 08/13] Update docs/modules/getting_started/pages/first_steps.adoc Co-authored-by: Felix Hennig --- docs/modules/getting_started/pages/first_steps.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/getting_started/pages/first_steps.adoc b/docs/modules/getting_started/pages/first_steps.adoc index 32661b61..318d8446 100644 --- a/docs/modules/getting_started/pages/first_steps.adoc +++ b/docs/modules/getting_started/pages/first_steps.adoc @@ -28,7 +28,7 @@ include::example$code/getting_started.sh[tag=watch-zk-rollout] === HDFS -An HDFS cluster has three components: then `namenode`, the `datanode` and the `journalnode`. Create a file named `hdfs.yaml` defining 2 `namenodes` and one `datanode` and `journalnode` each: +An HDFS cluster has three components: the `namenode`, the `datanode` and the `journalnode`. Create a file named `hdfs.yaml` defining 2 `namenodes` and one `datanode` and `journalnode` each: [source,yaml] ---- From 28a6a1230cf5cf5dab07b54af30093bd59104d00 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 18 Aug 2022 16:38:55 +0200 Subject: [PATCH 09/13] Update docs/modules/getting_started/examples/code/zk.yaml Co-authored-by: Felix Hennig --- docs/modules/getting_started/examples/code/zk.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/modules/getting_started/examples/code/zk.yaml b/docs/modules/getting_started/examples/code/zk.yaml index 78eb9b8c..b75276c4 100644 --- a/docs/modules/getting_started/examples/code/zk.yaml +++ b/docs/modules/getting_started/examples/code/zk.yaml @@ -8,8 +8,5 @@ spec: servers: roleGroups: default: - selector: - matchLabels: - kubernetes.io/os: linux replicas: 1 config: {} From 681b8851bf53061da8475c40d95a66b3d588ecc7 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 18 Aug 2022 16:39:00 +0200 Subject: [PATCH 10/13] Update docs/modules/getting_started/examples/code/zk.yaml Co-authored-by: Felix Hennig --- docs/modules/getting_started/examples/code/zk.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/modules/getting_started/examples/code/zk.yaml b/docs/modules/getting_started/examples/code/zk.yaml index b75276c4..eee85941 100644 --- a/docs/modules/getting_started/examples/code/zk.yaml +++ b/docs/modules/getting_started/examples/code/zk.yaml @@ -9,4 +9,3 @@ spec: roleGroups: default: replicas: 1 - config: {} From 0e8b5d34160569617cabecbbf127f219d5d1adcb Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 18 Aug 2022 16:39:06 +0200 Subject: [PATCH 11/13] Update docs/modules/getting_started/examples/code/hdfs.yaml Co-authored-by: Felix Hennig --- docs/modules/getting_started/examples/code/hdfs.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/modules/getting_started/examples/code/hdfs.yaml b/docs/modules/getting_started/examples/code/hdfs.yaml index 4d7ec233..92ee6074 100644 --- a/docs/modules/getting_started/examples/code/hdfs.yaml +++ b/docs/modules/getting_started/examples/code/hdfs.yaml @@ -55,7 +55,4 @@ spec: limit: '1Gi' roleGroups: default: - selector: - matchLabels: - kubernetes.io/os: linux replicas: 1 From 0da47905767bbc220f0063a792f521e127fdac12 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 18 Aug 2022 16:39:20 +0200 Subject: [PATCH 12/13] Update docs/modules/getting_started/examples/code/hdfs.yaml Co-authored-by: Felix Hennig --- docs/modules/getting_started/examples/code/hdfs.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/modules/getting_started/examples/code/hdfs.yaml b/docs/modules/getting_started/examples/code/hdfs.yaml index 92ee6074..9d757c05 100644 --- a/docs/modules/getting_started/examples/code/hdfs.yaml +++ b/docs/modules/getting_started/examples/code/hdfs.yaml @@ -41,9 +41,6 @@ spec: capacity: 1Gi roleGroups: default: - selector: - matchLabels: - kubernetes.io/os: linux replicas: 1 journalNodes: config: From 56cb52e7bae08fb2807a11d3eb9bbf2144862647 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Thu, 18 Aug 2022 16:58:44 +0200 Subject: [PATCH 13/13] incorporated review comments --- .../getting_started/examples/code/hdfs.yaml | 20 ------------------- docs/modules/getting_started/pages/index.adoc | 2 +- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/docs/modules/getting_started/examples/code/hdfs.yaml b/docs/modules/getting_started/examples/code/hdfs.yaml index 9d757c05..e7df7359 100644 --- a/docs/modules/getting_started/examples/code/hdfs.yaml +++ b/docs/modules/getting_started/examples/code/hdfs.yaml @@ -22,34 +22,14 @@ spec: log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter nameNodes: - config: - resources: - storage: - data: - capacity: 256Mi roleGroups: default: - selector: - matchLabels: - kubernetes.io/os: linux replicas: 2 dataNodes: - config: - resources: - storage: - data: - capacity: 1Gi roleGroups: default: replicas: 1 journalNodes: - config: - resources: - storage: - data: - capacity: 512Mi - memory: - limit: '1Gi' roleGroups: default: replicas: 1 diff --git a/docs/modules/getting_started/pages/index.adoc b/docs/modules/getting_started/pages/index.adoc index f7c5cc7d..f0f9ad14 100644 --- a/docs/modules/getting_started/pages/index.adoc +++ b/docs/modules/getting_started/pages/index.adoc @@ -1,6 +1,6 @@ = Getting started -This guide will get you started with HDFS using the Stackable Operator. It will guide you through the installation of the Operator and its dependencies, setting up your first HDFS cluster and ... +This guide will get you started with HDFS using the Stackable Operator. It will guide you through the installation of the Operator and its dependencies, setting up your first HDFS cluster and verifying its operation. == Prerequisites