stackabletech · fhennig · Jan 31, 2023 · Jan 31, 2023 · Jan 31, 2023
diff --git a/modules/ROOT/examples/code/migrate-hdfs-23_1.sh b/modules/ROOT/examples/code/migrate-hdfs-23_1.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+
+if [ $# -ne 1 ] ; then
+     echo "Usage: $0 CLUSTER_NAME"
+     exit 1
+else
+    HDFS_CLUSTER_NAME=$1
+fi
+
+kubectl get pvc -l app.kubernetes.io/name=hdfs -l app.kubernetes.io/instance="$HDFS_CLUSTER_NAME"
+for pvc in $(kubectl get pvc -l app.kubernetes.io/name=hdfs -l app.kubernetes.io/instance="$HDFS_CLUSTER_NAME" -l app.kubernetes.io/component=journalnode -o name | sed -e 's#persistentvolumeclaim/##'); do
+kubectl apply -f - << EOF
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: migrate-journalnode-${pvc}
+spec:
+  template:
+    spec:
+      containers:
+        - name: migrate
+          image: docker.stackable.tech/stackable/hadoop:3.3.4-stackable23.1.0
+          command: ["bash", "-c", "ls -la /stackable/data && if [ -d /stackable/data/journal ]; then echo Removing might existing target dir && rm -rf /stackable/data/journalnode && echo Renaming folder && mv /stackable/data/journal /stackable/data/journalnode; else echo Nothing to do; fi"]
+          volumeMounts:
+            - name: data
+              mountPath: /stackable/data
+      volumes:
+        - name: data
+          persistentVolumeClaim:
+            claimName: ${pvc}
+      restartPolicy: Never
+  backoffLimit: 1
+EOF
+done
+for pvc in $(kubectl get pvc -l app.kubernetes.io/name=hdfs -l app.kubernetes.io/instance="$HDFS_CLUSTER_NAME" -l app.kubernetes.io/component=namenode -o name | sed -e 's#persistentvolumeclaim/##'); do
+kubectl apply -f - << EOF
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: migrate-namenode-${pvc}
+spec:
+  template:
+    spec:
+      containers:
+        - name: migrate
+          image: docker.stackable.tech/stackable/hadoop:3.3.4-stackable23.1.0
+          command: ["bash", "-c", "ls -la /stackable/data && if [ -d /stackable/data/name ]; then echo Removing might existing target dir && rm -rf /stackable/data/namenode && echo Renaming folder && mv /stackable/data/name /stackable/data/namenode; else echo Nothing to do; fi"]
+          volumeMounts:
+            - name: data
+              mountPath: /stackable/data
+      volumes:
+        - name: data
+          persistentVolumeClaim:
+            claimName: ${pvc}
+      restartPolicy: Never
+  backoffLimit: 1
+EOF
+done
+for pvc in $(kubectl get pvc -l app.kubernetes.io/name=hdfs -l app.kubernetes.io/instance="$HDFS_CLUSTER_NAME" -l app.kubernetes.io/component=datanode -o name | sed -e 's#persistentvolumeclaim/##'); do
+kubectl apply -f - << EOF
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: migrate-datanode-${pvc}
+spec:
+  template:
+    spec:
+      containers:
+        - name: migrate
+          image: docker.stackable.tech/stackable/hadoop:3.3.4-stackable23.1.0
+          command: ["bash", "-c", "ls -la /stackable/data/data && if [ -d /stackable/data/data/data ]; then echo Removing might existing target dir && rm -rf /stackable/data/data/datanode && echo Renaming folder && mv /stackable/data/data/data /stackable/data/data/datanode; else echo Nothing to do; fi"]
+          volumeMounts:
+            - name: data
+              mountPath: /stackable/data/data
+      volumes:
+        - name: data
+          persistentVolumeClaim:
+            claimName: ${pvc}
+      restartPolicy: Never
+  backoffLimit: 1
+EOF
+done
diff --git a/modules/ROOT/pages/release_notes.adoc b/modules/ROOT/pages/release_notes.adoc
@@ -164,54 +164,43 @@ to
 ```
 
 ==== Stackable Operator for Apache Hadoop
-* https://github.com/stackabletech/hdfs-operator/issues/274[Support for multiple storage directories]
+* https://github.com/stackabletech/hdfs-operator/issues/290[Enable Log Aggregation for HDFS]
 
-As part of the change mentioned above the naming scheme for the PersistentVolumeClaims written for DataNodes has been changed, so that PVCs written by earlier operator versions are not recognized any more.
-Previous PVCs were called `hdfs-datanode-default-0` but now need to have the prefix `data-` added, because otherwise there may be naming collisions due to the ability to specify multiple storage classes.
+As part of the change mentioned above we also did some code cleanup that allowed us to remove arbitrary hard-coded values from the operator.
 
-In order to move over existing PVCs a few migration steps are required.
-Since it is not possible to rename PVCs you'll need to delete the existing PVCs and recreate them with the correct name and bound to the correct backing PV.
+This change affects the directory structure the operator creates inside of the PersistentVolumes used for permanent storage.
 
-Please find an example workflow of how this can be achieved below, this also sets the reclaim policy for the backing PV to `Retain` to avoid the PV being deleted when it becomes unbound, depending on your Kubernetes config this step may not be necessary.
+The old folder naming was:
 
-[source,bash]
-----
-export PVNAME=$(kubectl get pvc hdfs-datanode-default-0 -o yaml | yq '.spec.volumeName')
+ - DataNode -> `data`
+ - JournalNode -> `journal`
+ - NameNode -> `name`
 
-kubectl patch pv ${PVNAME} -p '{"spec":{"persistentVolumeReclaimPolicy": "Retain"}}'
+which has now been adopted to match the actual rolename:
 
-kubectl delete pvc hdfs-datanode-default-0
+- DataNode -> `datanode`
+- JournalNode -> `journalnode`
+- NameNode -> `namenode`
 
-kubectl patch pv ${PVNAME} -p '{"spec":{"claimRef": null}}'
-----
 
-Afterwards you can recreate the PVC with the new name and bind it to the PV.
+Unfortunately, this means that for cluster that where initially rolled out with an older operator version, a one-time migration step becomes necessary to rename these directories.
 
-Please note that you will need to adapt labels, storageClassName and resources to your specific configuration.
-Ideally export the pre-existing PVC with kubectl and change the name.
+You can either do this manually by attaching the PVs to a pod and performing the rename (cluster needs to be stopped for this) or use the script provided below.
 
-[source,yaml]
+WARNING: Please be aware that if this script runs after the cluster was already restarted with the newer operator version it will delete any data that was written to the empty post-upgrade HDFS that was stood up by the new operator.
+
+[source,bash]
 ----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  labels:
-    app.kubernetes.io/component: datanode
-    app.kubernetes.io/instance: hdfs
-    app.kubernetes.io/name: hdfs
-    app.kubernetes.io/role-group: default
-  name: data-hdfs-datanode-default-0
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 1Gi
-  storageClassName: standard
-  volumeMode: Filesystem
-  volumeName: <insert PV Name here>
+include::example$code/migrate-hdfs-23_1.sh[]
 ----
 
+The migration process for this now becomes:
+
+* Stop HDFS cluster by either removing the HdfsCluster definition object or scaling all roles to 0 replicas
+* Uninstall Stackable Operator for Apache Hadoop
+* Run migration script
+* Install newer version of Stackable Operator for Apache Hadoop
+
 ==== Stackable Operator for Apache Hive
 * https://github.com/stackabletech/hive-operator/pull/292[Moved database specification from role/role-group level to top-level clusterConfig]
 * https://github.com/stackabletech/hive-operator/pull/292[Moved s3, serviceType and hdfs discovery to top-level clusterConfig]

diff --git a/modules/contributor/pages/adr/drafts/ADRx-maintenance_windows.adoc b/modules/contributor/pages/adr/drafts/ADRx-maintenance_windows.adoc
@@ -0,0 +1,84 @@
+= Definition of Maintenance Windows for the Managed Applications
+Sönke Liebau <[email protected]>
+v0.1, 2023-01-30
+:status: draft
+
+* Status: {status}
+* Deciders: [list everyone involved in the decision] <!-- optional -->
+* Date: [YYYY-MM-DD when the decision was last updated] <!-- optional -->
+
+Technical Story: [description | ticket/issue URL] <!-- optional -->
+
+== Context and Problem Statement
+
+When operating productive environments with the tools of the SDP, it can often be desirable to avoid restarts or changes unless they have been planned and communicated.
+
+There are a couple of things that could trigger a change that in turn trigger a restart of the applications in the SDP:
+
+- User changes to the definitions (CRDs)
+- Operator upgrade that causes different objects to be written for the same definition
+- Changes to dependent objects
+- ...
+
+In order to enable users to better control when these changes are allowed to affect deployed applications we should add the ability to define maintenance windows
+
+== Decision Drivers <!-- optional -->
+
+* [driver 1, e.g., a force, facing concern, …]
+* [driver 2, e.g., a force, facing concern, …]
+* … <!-- numbers of drivers can vary -->
+
+== Considered Options
+
+* Implement a top level struct that can in
+* [option 2]
+* [option 3]
+* … <!-- numbers of options can vary -->
+
+== Decision Outcome
+
+Chosen option: "[option 1]", because [justification. e.g., only option, which meets k.o. criterion decision driver | which resolves force force | … | comes out best (see below)].
+
+=== Positive Consequences <!-- optional -->
+
+* [e.g., improvement of quality attribute satisfaction, follow-up decisions required, …]
+* …
+
+=== Negative Consequences <!-- optional -->
+
+* [e.g., compromising quality attribute, follow-up decisions required, …]
+* …
+
+== Pros and Cons of the Options <!-- optional -->
+
+=== [option 1]
+
+[example | description | pointer to more information | …] <!-- optional -->
+
+* Good, because [argument a]
+* Good, because [argument b]
+* Bad, because [argument c]
+* … <!-- numbers of pros and cons can vary -->
+
+=== [option 2]
+
+[example | description | pointer to more information | …] <!-- optional -->
+
+* Good, because [argument a]
+* Good, because [argument b]
+* Bad, because [argument c]
+* … <!-- numbers of pros and cons can vary -->
+
+=== [option 3]
+
+[example | description | pointer to more information | …] <!-- optional -->
+
+* Good, because [argument a]
+* Good, because [argument b]
+* Bad, because [argument c]
+* … <!-- numbers of pros and cons can vary -->
+
+== Links <!-- optional -->
+
+* [Link type] [Link to ADR] <!-- example: Refined by [ADR-0005](0005-example.md) -->
+* … <!-- numbers of links can vary -->