1、安装datax
[root@datax ~]# yum install java-1.8.0-openjdk java-1.8.0-openjdk-devel
[root@datax ~]# wget https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/202303/datax.tar.gz
[root@datax ~]# tar xzf datax.tar.gz -C /usr/local/
[root@datax ~]# chmod -R 755 /usr/local/datax
[root@datax ~]# echo "export PATH=\$PATH:/usr/local/datax/bin" >>/etc/profile
[root@datax ~]# source /etc/profile
2、datax同步MySQL数据
--新建源表
create database test1;
use test1;
CREATE TABLE `t_user_info` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`content` text,
`createdate` datetime DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
INSERT INTO `test1`.`t_user_info` (`id`, `name`, `content`, `createdate`) VALUES ('1', 'linux重启mysql的命令', 'linux重启mysql的命令', '2020-04-19 21:18:58');
INSERT INTO `test1`.`t_user_info` (`id`, `name`, `content`, `createdate`) VALUES ('2', 'jQuery给input绑定回车事件', 'jQuery给input绑定回车事件', '2020-04-20 21:19:14');
INSERT INTO `test1`.`t_user_info` (`id`, `name`, `content`, `createdate`) VALUES ('3', 'Web开发Session超时设置', 'Web开发Session超时设置', '2020-04-21 21:19:19');
--新建目标表
create database test2;
use test2;
CREATE TABLE `t_user_info` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`content` text,
`createdate` datetime DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
--新建同步用户
create user test@'%' identified by '123456';
grant all on *.* to test@'%';
flush privileges;
--编写同步job
[root@datax ~]# cat /usr/local/datax/job/mysql_2_mysql.json
{
"job": {
"setting": {
"speed": {
"channel": 1
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "test",
"password": "123456",
"column": [ "id", "name","content" ,"createdate"],
"splitPk": "id",
"connection": [
{
"table": [
"t_user_info"
],
"jdbcUrl": [
"jdbc:mysql://192.168.100.82:3306/test1?useSSL=false"
]
}
]
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "test",
"password": "123456",
"column": [ "id", "name","content","createdate"],
"session": [
"set session sql_mode='ANSI'"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://192.168.100.82:3306/test2?useSSL=false",
"table": [
"t_user_info"
]
}
]
}
}
}
]
}
}
--执行同步
python /usr/local/datax/bin/datax.py /usr/local/datax/job/mysql_2_mysql.json
3、伪分布式部署 DolphinScheduler 需要有外部软件的支持
JDK:下载JDK (1.8+),安装并配置
JAVA_HOME
环境变量,并将其下的bin
目录追加到PATH
环境变量中。如果你的环境中已存在,可以跳过这步。二进制包:在下载页面下载 DolphinScheduler 二进制包
数据库:PostgreSQL (8.2.15+) 或者 MySQL (5.7+),两者任选其一即可,如 MySQL 则需要 JDBC Driver 8.0.16
进程树分析
macOS安装
pstree
Fedora/Red/Hat/CentOS/Ubuntu/Debian安装
psmisc
*注意:* DolphinScheduler 本身不依赖 Hadoop、Hive、Spark,但如果你运行的任务需要依赖他们,就需要有对应的环境支持
文档参考:https://dolphinscheduler.apache.org/zh-cn/docs/3.1.7/%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97_menu
4、安装zookeeper
--安装zookeeper
[root@datax ~]# wget https://archive.apache.org/dist/zookeeper/zookeeper-3.7.1/apache-zookeeper-3.7.1-bin.tar.gz
[root@datax ~]# tar xzf apache-zookeeper-3.7.1-bin.tar.gz -C /usr/local/
[root@datax ~]# ln -s /usr/local/apache-zookeeper-3.7.1-bin /usr/local/apache-zookeeper
[root@datax ~]# mkdir -p /usr/local/apache-zookeeper/{data,logs}
[root@datax ~]# cd /usr/local/apache-zookeeper/conf
[root@datax conf]# cp zoo_sample.cfg zoo.cfg
--修改zoo.cfg配置文件
[root@datax ~]# mkdir -p /usr/local/apache-zookeeper/{data,logs}
dataDir=/usr/local/apache-zookeeper/data
datalogDir=/usr/local/apache-zookeeper/logs
--修改日志文件目录
vim /usr/local/apache-zookeeper/bin/zkEnv.sh
if [ "x${ZOO_LOG_DIR}" = "x" ]
then
ZOO_LOG_DIR="/usr/local/apache-zookeeper/logs" --修改日志目录
fi
--启动zookeeper
[root@datax ~]# cd /usr/local/apache-zookeeper/bin
[root@datax bin]# ./zkServer.sh start
--查看zookeeper状态
[root@datax bin]# ./zkServer.sh status
/usr/bin/java
ZooKeeper JMX enabled by default
Using config: /usr/local/apache-zookeeper/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost. Client SSL: false.
Mode: standalone
5、MySQL数据库相关配置
mysql> CREATE DATABASE dolphinscheduler DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_general_ci;
mysql> CREATE USER 'dolphinscheduler'@'%' IDENTIFIED BY 'ds.2023';
mysql> GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'dolphinscheduler'@'%';
mysql> FLUSH PRIVILEGES;
6、安装dolphinscheduler
[root@datax ~]# useradd dolphinscheduler
[root@datax ~]# echo "dolphinscheduler" | passwd --stdin dolphinscheduler
[root@datax ~]# sed -i '$adolphinscheduler ALL=(ALL) NOPASSWD: NOPASSWD: ALL' /etc/sudoers
[root@datax ~]# sed -i 's/Defaults requirett/#Defaults requirett/g' /etc/sudoers
[root@datax ~]# wget --no-check-certificate https://dlcdn.apache.org/dolphinscheduler/3.1.7/apache-dolphinscheduler-3.1.7-bin.tar.gz
[root@datax ~]# tar xzf apache-dolphinscheduler-3.1.7-bin.tar.gz -C /usr/local/
[root@datax ~]# ln -s /usr/local/apache-dolphinscheduler-3.1.7-bin /usr/local/apache-dolphinscheduler
[root@datax ~]# chown -R dolphinscheduler.dolphinscheduler /usr/local/apache-dolphinscheduler*
--配置ssh密钥
[root@datax ~]# su - dolphinscheduler
[root@datax ~]# ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
[root@datax ~]# cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[root@datax ~]# chmod 600 ~/.ssh/authorized_keys
--修改环境变量
[root@datax ~]# cd /usr/local/apache-dolphinscheduler/bin/env
[root@datax env]# ls
dolphinscheduler_env.sh install_env.sh
--文件 install_env.sh 描述了哪些机器将被安装 DolphinScheduler 以及每台机器对应安装哪些服务
[root@datax env]# egrep -v '^$|^#' install_env.sh
ips=${ips:-"192.168.100.82"}
sshPort=${sshPort:-"22"}
masters=${masters:-"192.168.100.82"}
workers=${workers:-"192.168.100.82:default"}
alertServer=${alertServer:-"192.168.100.82"}
apiServers=${apiServers:-"192.168.100.82"}
installPath=${installPath:-"/usr/local/apache-dolphinscheduler"}
deployUser=${deployUser:-"dolphinscheduler"}
zkRoot=${zkRoot:-"/dolphinscheduler"}
--文件 ./bin/env/dolphinscheduler_env.sh 描述了下列配置:
1、DolphinScheduler 的数据库配置,详细配置方法见初始化数据库
2、一些任务类型外部依赖路径或库文件,如 JAVA_HOME 和 SPARK_HOME都是在这里定义的
3、注册中心zookeeper
4、服务端相关配置,比如缓存,时区设置等
vim dolphinscheduler_env.sh
# JAVA_HOME, will use it to start DolphinScheduler server
export JAVA_HOME=${JAVA_HOME:-/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.372.b07-1.el7_9.x86_64/jre} --设置Java环境路径
# Database related configuration, set database type, username and password
export DATABASE=${DATABASE:-mysql} --设置MySQL数据源
export SPRING_PROFILES_ACTIVE=${DATABASE}
export SPRING_DATASOURCE_URL="jdbc:mysql://192.168.100.82:3306/dolphinscheduler?useUnicode=true&characterEncoding=UTF-8&useSSL=false&allowPublicKeyRetrieval=true"
export SPRING_DATASOURCE_USERNAME=dolphinscheduler
export SPRING_DATASOURCE_PASSWORD=ds.2023
# DolphinScheduler server related configuration
export SPRING_CACHE_TYPE=${SPRING_CACHE_TYPE:-none}
export SPRING_JACKSON_TIME_ZONE=${SPRING_JACKSON_TIME_ZONE:-Asia/Shanghai} --设置时区
export MASTER_FETCH_COMMAND_NUM=${MASTER_FETCH_COMMAND_NUM:-10}
# Registry center configuration, determines the type and link of the registry center
export REGISTRY_TYPE=${REGISTRY_TYPE:-zookeeper}
export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-192.168.100.82:2181} --zookeeper地址
# Tasks related configurations, need to change the configuration if you use the related tasks.
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME1=${SPARK_HOME1:-/opt/soft/spark1}
export SPARK_HOME2=${SPARK_HOME2:-/opt/soft/spark2}
export PYTHON_HOME=${PYTHON_HOME:-/bin/python} --设置Python变量
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/usr/local/datax} --设置datax路径
export SEATUNNEL_HOME=${SEATUNNEL_HOME:-/opt/soft/seatunnel}
export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun}
#export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$SEATUNNEL_HOME/bin:$CHUNJUN_HOME/bin:$PATH
export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$SEATUNNEL_HOME/bin:$CHUNJUN_HOME/bin:$PATH
--复制mysql-connector-java
[root@datax ~]# cp mysql-connector-java-8.0.30.jar /usr/local/apache-dolphinscheduler/tools/libs/
[root@datax ~]# cp mysql-connector-java-8.0.30.jar /usr/local/apache-dolphinscheduler/master-server/libs/
[root@datax ~]# cp mysql-connector-java-8.0.30.jar /usr/local/apache-dolphinscheduler/worker-server/libs/
[root@datax ~]# cp mysql-connector-java-8.0.30.jar /usr/local/apache-dolphinscheduler/api-server/libs/
[root@datax ~]# cp mysql-connector-java-8.0.30.jar /usr/local/apache-dolphinscheduler/alert-server/libs/
[root@datax ~]# chown -R dolphinscheduler.dolphinscheduler /usr/local/apache-dolphinscheduler*
--初始化数据库
[root@datax ~]# cd /usr/local/apache-dolphinscheduler
[root@datax apache-dolphinscheduler]# bash tools/bin/upgrade-schema.sh
--查看MySQL数据库初始化是否成功
mysql> use dolphinscheduler
Database changed
mysql> show tables;
+----------------------------------------+
| Tables_in_dolphinscheduler |
+----------------------------------------+
| qrtz_blob_triggers |
| qrtz_calendars |
| qrtz_cron_triggers |
| qrtz_fired_triggers |
| qrtz_job_details |
| qrtz_locks |
| qrtz_paused_trigger_grps |
| qrtz_scheduler_state |
| qrtz_simple_triggers |
| qrtz_simprop_triggers |
| qrtz_triggers |
| t_ds_access_token |
| t_ds_alert |
| t_ds_alert_plugin_instance |
| t_ds_alert_send_status |
| t_ds_alertgroup |
| t_ds_audit_log |
| t_ds_cluster |
| t_ds_command |
| t_ds_datasource |
| t_ds_dq_comparison_type |
| t_ds_dq_execute_result |
| t_ds_dq_rule |
| t_ds_dq_rule_execute_sql |
| t_ds_dq_rule_input_entry |
| t_ds_dq_task_statistics_value |
| t_ds_environment |
| t_ds_environment_worker_group_relation |
| t_ds_error_command |
| t_ds_fav_task |
| t_ds_k8s |
| t_ds_k8s_namespace |
| t_ds_plugin_define |
| t_ds_process_definition |
| t_ds_process_definition_log |
| t_ds_process_instance |
| t_ds_process_task_relation |
| t_ds_process_task_relation_log |
| t_ds_project |
| t_ds_queue |
| t_ds_relation_datasource_user |
| t_ds_relation_namespace_user |
| t_ds_relation_process_instance |
| t_ds_relation_project_user |
| t_ds_relation_resources_user |
| t_ds_relation_rule_execute_sql |
| t_ds_relation_rule_input_entry |
| t_ds_relation_udfs_user |
| t_ds_resources |
| t_ds_schedules |
| t_ds_session |
| t_ds_task_definition |
| t_ds_task_definition_log |
| t_ds_task_group |
| t_ds_task_group_queue |
| t_ds_task_instance |
| t_ds_tenant |
| t_ds_udfs |
| t_ds_user |
| t_ds_version |
| t_ds_worker_group |
+----------------------------------------+
61 rows in set (0.00 sec)
--启动 DolphinScheduler
[root@datax ~]# su - dolphinscheduler
[dolphinscheduler@datax ~]$ cd /usr/local/apache-dolphinscheduler
[dolphinscheduler@datax apache-dolphinscheduler]$ bash ./bin/install.sh
--最终启动状态
====================== dolphinscheduler server config =============================
1.dolphinscheduler server node config hosts:[ 192.168.100.82 ]
2.master server node config hosts:[ 192.168.100.82 ]
3.worker server node config hosts:[ 192.168.100.82:default ]
4.alert server node config hosts:[ 192.168.100.82 ]
5.api server node config hosts:[ 192.168.100.82 ]
====================== dolphinscheduler server status =============================
node server state
192.168.100.82 Begin status master-server......
master-server [ RUNNING ]
End status master-server.
192.168.100.82 Begin status worker-server......
worker-server [ RUNNING ]
End status worker-server.
192.168.100.82 Begin status alert-server......
alert-server [ RUNNING ]
End status alert-server.
192.168.100.82 Begin status api-server......
api-server [ RUNNING ]
End status api-server.
--登录 DolphinScheduler
浏览器访问地址 http://localhost:12345/dolphinscheduler/ui 即可登录系统UI。默认的用户名和密码是 admin/dolphinscheduler123
--临时更改密码为admin/tom.123
--启停服务
[root@datax ~]# su - dolphinscheduler
[dolphinscheduler@datax ~]$ cd /usr/local/apache-dolphinscheduler
# 一键停止集群所有服务
bash ./bin/stop-all.sh
# 一键开启集群所有服务
bash ./bin/start-all.sh
# 启停 Master
bash ./bin/dolphinscheduler-daemon.sh stop master-server
bash ./bin/dolphinscheduler-daemon.sh start master-server
# 启停 Worker
bash ./bin/dolphinscheduler-daemon.sh start worker-server
bash ./bin/dolphinscheduler-daemon.sh stop worker-server
# 启停 Api
bash ./bin/dolphinscheduler-daemon.sh start api-server
bash ./bin/dolphinscheduler-daemon.sh stop api-server
# 启停 Alert
bash ./bin/dolphinscheduler-daemon.sh start alert-server
bash ./bin/dolphinscheduler-daemon.sh stop alert-server
「喜欢这篇文章,您的关注和赞赏是给作者最好的鼓励」
关注作者
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。