summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordoufenghu <[email protected]>2024-06-20 21:54:10 +0800
committerdoufenghu <[email protected]>2024-06-20 21:54:10 +0800
commit445f9f129ad07e04a3e087a93793d6ce5a47f341 (patch)
tree7e8fc7e67782a90f84eaa4f3abca070ddb160561
parent687f56833e01f1ba6b2e66ab5d7c53847fbfd6c4 (diff)
[Fix][docs] Fix some bootstrap errors
-rw-r--r--README.md10
-rw-r--r--config/grootstream_env.sh6
-rw-r--r--docs/faq.md18
-rw-r--r--groot-bootstrap/src/main/bin/stop.sh18
4 files changed, 37 insertions, 15 deletions
diff --git a/README.md b/README.md
index 58a064e..ab6d074 100644
--- a/README.md
+++ b/README.md
@@ -75,7 +75,7 @@ FLINK_JOB_MANAGER_ADDRESS=${FLINK_JOB_MANAGER_ADDRESS:-localhost:8081}
YARN_ADDRESS=${YARN_ADDRESS:-yarn-cluster}
```
#### 3. Configure the groot-stream job
-You need to configure the groot-stream job in `config/template/grootstream_job_template.yaml` file. More information about config please check [config concept](docs/user-guide.md)
+You need to configure the groot-stream job in `config/grootstream_job_example.yaml` file. More information about config please check [config concept](docs/user-guide.md)
#### 4. Submit a job to flink engine
Can be started by a daemon with `-d`.
@@ -89,7 +89,7 @@ Can be started by a daemon with `-d`.
2. Open the `Run/Debug Configurations` window.
3. Choose -cp `groot-bootstrap`
4. Choose Main Class `com.geedgenetworks.bootstrap.main.GrootStreamServer`.
-5. Add VM options `--target local -c /...../groot-stream/config/inline_to_print_template.yaml`.
+5. Add VM options `--target local -c /...../groot-stream/config/grootstream_job_example.yaml`.
6. Click the `Run` button.
#### Running the CLI
@@ -100,13 +100,16 @@ cd "groot-stream-${version}"
```
- Run the following command to start the groot-stream server for Yarn Session Mode:
```shell
+# First create a yarn session cluster
+yarn-session.sh -d
+# Then start the groot-stream server for Yarn Session Mode.
cd "groot-stream-${version}"
./bin/start.sh -c ./config/grootstream_job_example.yaml --target yarn-session -Dyarn.application.id=application_XXXX_YY -n inline-to-print-job -d
```
- Run the following command to start the groot-stream server for Yarn Per-job Mode:
```shell
cd "groot-stream-${version}"
-./bin/start.sh -c ./config/grootstream_job_example.yaml --target yarn-per-job -Dyarn.application.name="inline-to-print-job" -n inline-to-print-job -d
+./bin/start.sh -c ./config/grootstream_job_example.yaml --target yarn-per-job -Dyarn.application.name="inline-to-print-job" Djobmanager.memory.process.size=1024m -Dtaskmanager.memory.process.size=2048m -Dtaskmanager.numberOfTaskSlots=3 -p 6 -n inline-to-print-job -d
```
### Configuring
@@ -118,4 +121,3 @@ See the [Groot Stream Documentation](docs) for more information.
## Contributors
All developers see the list of contributors [here](https://git.mesalab.cn/galaxy/platform/groot-stream/-/graphs/develop?ref_type=heads).
-
diff --git a/config/grootstream_env.sh b/config/grootstream_env.sh
index 69fa8d3..3acfe55 100644
--- a/config/grootstream_env.sh
+++ b/config/grootstream_env.sh
@@ -1,5 +1,7 @@
#!/usr/bin/env bash
# Home directory of flink distribution.
FLINK_HOME=${FLINK_HOME:-/opt/flink}
-FLINK_JOB_MANAGER_ADDRESS=${FLINK_JOB_MANAGER_ADDRESS:-localhost:8081}
-YARN_ADDRESS=${YARN_ADDRESS:-yarn-cluster} \ No newline at end of file
+# Flink job manager address deployed in standalone mode.
+STANDALONE_JOB_MANAGER_ADDRESS=${STANDALONE_JOB_MANAGER_ADDRESS:-localhost:8081}
+# YARN session cluster ID.
+YARN_SESSION_CLUSTER_ID=${YARN_SESSION_CLUSTER_ID:-applicaion_1630480000000_0001} \ No newline at end of file
diff --git a/docs/faq.md b/docs/faq.md
index e69de29..2af4e2b 100644
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -0,0 +1,18 @@
+## Maven 3.8.1 blocked mirror for internal repositories
+If you are using a Maven Wrapper (mvnw) build project, you may encounter the following error:
+```
+maven-default-http-blocker (http://0.0.0.0/): Blocked mirror for repositories: xxx
+```
+This is because Maven 3.8.1 has a new feature that blocks all HTTP requests by default. You can disable this feature by adding the following configuration to the Maven settings.xml file:
+```xml
+<mirrors>
+ <mirror>
+ <id>maven-default-http-blocker</id>
+ <mirrorOf>external:dont-match-anything-mate:*</mirrorOf>
+ <name>Pseudo repository to mirror external repositories initially using HTTP.</name>
+ <url>http://0.0.0.0/</url>
+ <blocked>false</blocked>
+ </mirror>
+</mirrors>
+```
+
diff --git a/groot-bootstrap/src/main/bin/stop.sh b/groot-bootstrap/src/main/bin/stop.sh
index 91e24b9..c68f27c 100644
--- a/groot-bootstrap/src/main/bin/stop.sh
+++ b/groot-bootstrap/src/main/bin/stop.sh
@@ -2,31 +2,31 @@
# Function to display usage
display_usage() {
- echo "Usage: $0 <DEPLOYMENT_MODE> <JOB_NAME>"
+ echo "Usage: $0 <DEPLOYMENT_MODE> <JOB_OR_YARN_APP_NAME>"
echo "DEPLOYMENT_MODE: standalone, yarn-per-job, or yarn-session"
}
# Function to stop Flink jobs based on deployment mode
stop_jobs() {
case $1 in
standalone)
- "$FLINK_HOME"/bin/flink list -r -m "$FLINK_JOB_MANAGER_ADDRESS" | grep "$job_name" | awk '{print $4}' | while read -r jobId
+ "$FLINK_HOME"/bin/flink list -r -m "$FLINK_STANDALONE_JOB_MANAGER_ADDRESS" | grep "$job_or_yarn_app_name" | awk '{print $4}' | while read -r jobId
do
- "$FLINK_HOME"/bin/flink cancel "$jobId" -m "$FLINK_JOB_MANAGER_ADDRESS"
+ "$FLINK_HOME"/bin/flink cancel "$jobId" -m "$FLINK_STANDALONE_JOB_MANAGER_ADDRESS"
echo "Stopped Flink job with JobID: $jobId"
done
;;
yarn-per-job)
# Command to stop YARN applications for the specified Yarn cluster app name
- yarn application -list -appStates RUNNING | grep "$job_name" | awk '{print $1}' | while read -r appId
+ yarn application -list -appStates RUNNING | grep "$job_or_yarn_app_name" | awk '{print $1}' | while read -r appId
do
yarn application -kill "$appId"
echo "Stopped YARN application with ApplicationID: $appId"
done
;;
yarn-session)
- "$FLINK_HOME"/bin/flink list -r -yid "$YARN_ADDRESS" | grep "$job_name" | awk '{print $4}' | while read -r jobId
+ "$FLINK_HOME"/bin/flink list -r -yid "$YARN_SESSION_CLUSTER_ID" | grep "$job_or_yarn_app_name" | awk '{print $4}' | while read -r jobId
do
- "$FLINK_HOME"/bin/flink cancel "$jobId" -yid "$YARN_ADDRESS"
+ "$FLINK_HOME"/bin/flink cancel "$jobId" -yid "$YARN_SESSION_CLUSTER_ID"
echo "Stopped Flink job with JobID: $jobId"
done
;;
@@ -63,16 +63,16 @@ fi
# Assigning input arguments to variables
deployment_mode=$1 # standalone, yarn-per-job, or yarn-session
-job_name=$2 # The Flink job name to stop
+job_or_yarn_app_name=$2 # Standalone job name or YARN application name
# Checking for empty input arguments
-if [ -z "$deployment_mode" ] || [ -z "$job_name" ]; then
+if [ -z "$deployment_mode" ] || [ -z "$job_or_yarn_app_name" ]; then
display_usage
exit 1
fi
# Execute stop_jobs function with provided arguments
-stop_jobs "$deployment_mode" "$job_name"
+stop_jobs "$deployment_mode" "$job_or_yarn_app_name"