暂无图片
暂无图片
暂无图片
暂无图片
暂无图片

dolphinscheduler配置datax

原创 huayumicheng 2023-06-24
4405


1、安装datax

[root@datax ~]# yum install java-1.8.0-openjdk java-1.8.0-openjdk-devel 
[root@datax ~]# wget https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/202303/datax.tar.gz
[root@datax ~]# tar xzf datax.tar.gz -C /usr/local/
[root@datax ~]# chmod -R 755 /usr/local/datax
[root@datax ~]# echo "export PATH=\$PATH:/usr/local/datax/bin" >>/etc/profile
[root@datax ~]# source /etc/profile

2、datax同步MySQL数据

--新建源表
create database test1;
use test1;


CREATE TABLE `t_user_info` (
 `id` int(11) NOT NULL AUTO_INCREMENT,
 `name` varchar(255) DEFAULT NULL,
 `content` text,
 `createdate` datetime DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

INSERT INTO `test1`.`t_user_info` (`id`, `name`, `content`, `createdate`) VALUES ('1', 'linux重启mysql的命令', 'linux重启mysql的命令', '2020-04-19 21:18:58');
INSERT INTO `test1`.`t_user_info` (`id`, `name`, `content`, `createdate`) VALUES ('2', 'jQuery给input绑定回车事件', 'jQuery给input绑定回车事件', '2020-04-20 21:19:14');
INSERT INTO `test1`.`t_user_info` (`id`, `name`, `content`, `createdate`) VALUES ('3', 'Web开发Session超时设置', 'Web开发Session超时设置', '2020-04-21 21:19:19');


--新建目标表
create database test2;

use test2;

CREATE TABLE `t_user_info` (
 `id` int(11) NOT NULL AUTO_INCREMENT,
 `name` varchar(255) DEFAULT NULL,
 `content` text,
 `createdate` datetime DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;


--新建同步用户
create user test@'%' identified by '123456';
grant all on *.* to test@'%';
flush privileges;


--编写同步job
[root@datax ~]# cat /usr/local/datax/job/mysql_2_mysql.json
{
   "job": {
       "setting": {
           "speed": {
               "channel": 1
          }
      },
       "content": [
          {
               "reader": {
                   "name": "mysqlreader",
                   "parameter": {
                       "username": "test",
                       "password": "123456",
                       "column": [ "id", "name","content" ,"createdate"],
                       "splitPk": "id",
                       "connection": [
                          {
                               "table": [
                                   "t_user_info"
                              ],
                               "jdbcUrl": [
                                   "jdbc:mysql://192.168.100.82:3306/test1?useSSL=false"
                              ]
                          }
                      ]
                  }
              },
               "writer": {
                   "name": "mysqlwriter",
                   "parameter": {
                       "writeMode": "insert",
                       "username": "test",
                       "password": "123456",
                       "column": [ "id", "name","content","createdate"],
                       "session": [
                           "set session sql_mode='ANSI'"
                      ],
                       "connection": [
                          {
                               "jdbcUrl": "jdbc:mysql://192.168.100.82:3306/test2?useSSL=false",
                               "table": [
                                   "t_user_info"
                              ]
                          }
                      ]
                  }
              }
          }
      ]
  }
}


--执行同步
python /usr/local/datax/bin/datax.py /usr/local/datax/job/mysql_2_mysql.json

3、伪分布式部署 DolphinScheduler 需要有外部软件的支持

  • JDK:下载JDK (1.8+),安装并配置 JAVA_HOME 环境变量,并将其下的 bin 目录追加到 PATH 环境变量中。如果你的环境中已存在,可以跳过这步。

  • 二进制包:在下载页面下载 DolphinScheduler 二进制包

  • 数据库:PostgreSQL (8.2.15+) 或者 MySQL (5.7+),两者任选其一即可,如 MySQL 则需要 JDBC Driver 8.0.16

  • 注册中心:ZooKeeper (3.4.6+),下载地址

  • 进程树分析

    • macOS安装pstree

    • Fedora/Red/Hat/CentOS/Ubuntu/Debian安装psmisc

*注意:* DolphinScheduler 本身不依赖 Hadoop、Hive、Spark,但如果你运行的任务需要依赖他们,就需要有对应的环境支持

文档参考:https://dolphinscheduler.apache.org/zh-cn/docs/3.1.7/%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97_menu



4、安装zookeeper

--安装zookeeper
[root@datax ~]# wget https://archive.apache.org/dist/zookeeper/zookeeper-3.7.1/apache-zookeeper-3.7.1-bin.tar.gz
[root@datax ~]# tar xzf apache-zookeeper-3.7.1-bin.tar.gz -C /usr/local/
[root@datax ~]# ln -s /usr/local/apache-zookeeper-3.7.1-bin /usr/local/apache-zookeeper
[root@datax ~]# mkdir -p /usr/local/apache-zookeeper/{data,logs}
[root@datax ~]# cd /usr/local/apache-zookeeper/conf
[root@datax conf]# cp zoo_sample.cfg zoo.cfg  


--修改zoo.cfg配置文件
[root@datax ~]# mkdir -p /usr/local/apache-zookeeper/{data,logs}

dataDir=/usr/local/apache-zookeeper/data
datalogDir=/usr/local/apache-zookeeper/logs


--修改日志文件目录

vim /usr/local/apache-zookeeper/bin/zkEnv.sh


if [ "x${ZOO_LOG_DIR}" = "x" ]
then
   ZOO_LOG_DIR="/usr/local/apache-zookeeper/logs"     --修改日志目录
fi

--启动zookeeper
[root@datax ~]# cd /usr/local/apache-zookeeper/bin
[root@datax bin]# ./zkServer.sh start


--查看zookeeper状态
[root@datax bin]# ./zkServer.sh status
/usr/bin/java
ZooKeeper JMX enabled by default
Using config: /usr/local/apache-zookeeper/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost. Client SSL: false.
Mode: standalone

5、MySQL数据库相关配置

mysql> CREATE DATABASE dolphinscheduler DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_general_ci;
mysql> CREATE USER 'dolphinscheduler'@'%' IDENTIFIED BY 'ds.2023';
mysql> GRANT ALL PRIVILEGES ON dolphinscheduler.* TO 'dolphinscheduler'@'%';
mysql> FLUSH PRIVILEGES;


6、安装dolphinscheduler

[root@datax ~]# useradd dolphinscheduler
[root@datax ~]# echo "dolphinscheduler" | passwd --stdin dolphinscheduler
[root@datax ~]# sed -i '$adolphinscheduler ALL=(ALL) NOPASSWD: NOPASSWD: ALL' /etc/sudoers
[root@datax ~]# sed -i 's/Defaults   requirett/#Defaults   requirett/g' /etc/sudoers

[root@datax ~]# wget --no-check-certificate https://dlcdn.apache.org/dolphinscheduler/3.1.7/apache-dolphinscheduler-3.1.7-bin.tar.gz
[root@datax ~]# tar xzf apache-dolphinscheduler-3.1.7-bin.tar.gz -C /usr/local/
[root@datax ~]# ln -s /usr/local/apache-dolphinscheduler-3.1.7-bin /usr/local/apache-dolphinscheduler
[root@datax ~]# chown -R dolphinscheduler.dolphinscheduler /usr/local/apache-dolphinscheduler*

--配置ssh密钥
[root@datax ~]# su - dolphinscheduler
[root@datax ~]# ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
[root@datax ~]# cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[root@datax ~]# chmod 600 ~/.ssh/authorized_keys


--修改环境变量
[root@datax ~]# cd /usr/local/apache-dolphinscheduler/bin/env
[root@datax env]# ls
dolphinscheduler_env.sh install_env.sh

--文件 install_env.sh 描述了哪些机器将被安装 DolphinScheduler 以及每台机器对应安装哪些服务


[root@datax env]# egrep -v '^$|^#' install_env.sh
ips=${ips:-"192.168.100.82"}
sshPort=${sshPort:-"22"}
masters=${masters:-"192.168.100.82"}
workers=${workers:-"192.168.100.82:default"}
alertServer=${alertServer:-"192.168.100.82"}
apiServers=${apiServers:-"192.168.100.82"}
installPath=${installPath:-"/usr/local/apache-dolphinscheduler"}
deployUser=${deployUser:-"dolphinscheduler"}
zkRoot=${zkRoot:-"/dolphinscheduler"}



--文件 ./bin/env/dolphinscheduler_env.sh 描述了下列配置:
1、DolphinScheduler 的数据库配置,详细配置方法见初始化数据库
2、一些任务类型外部依赖路径或库文件,如 JAVA_HOME 和 SPARK_HOME都是在这里定义的
3、注册中心zookeeper
4、服务端相关配置,比如缓存,时区设置等

vim dolphinscheduler_env.sh

# JAVA_HOME, will use it to start DolphinScheduler server
export JAVA_HOME=${JAVA_HOME:-/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.372.b07-1.el7_9.x86_64/jre}        --设置Java环境路径

# Database related configuration, set database type, username and password
export DATABASE=${DATABASE:-mysql}             --设置MySQL数据源
export SPRING_PROFILES_ACTIVE=${DATABASE}
export SPRING_DATASOURCE_URL="jdbc:mysql://192.168.100.82:3306/dolphinscheduler?useUnicode=true&characterEncoding=UTF-8&useSSL=false&allowPublicKeyRetrieval=true"
export SPRING_DATASOURCE_USERNAME=dolphinscheduler
export SPRING_DATASOURCE_PASSWORD=ds.2023

# DolphinScheduler server related configuration
export SPRING_CACHE_TYPE=${SPRING_CACHE_TYPE:-none}
export SPRING_JACKSON_TIME_ZONE=${SPRING_JACKSON_TIME_ZONE:-Asia/Shanghai}   --设置时区
export MASTER_FETCH_COMMAND_NUM=${MASTER_FETCH_COMMAND_NUM:-10}

# Registry center configuration, determines the type and link of the registry center
export REGISTRY_TYPE=${REGISTRY_TYPE:-zookeeper}
export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-192.168.100.82:2181}   --zookeeper地址

# Tasks related configurations, need to change the configuration if you use the related tasks.
export HADOOP_HOME=${HADOOP_HOME:-/opt/soft/hadoop}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/opt/soft/hadoop/etc/hadoop}
export SPARK_HOME1=${SPARK_HOME1:-/opt/soft/spark1}
export SPARK_HOME2=${SPARK_HOME2:-/opt/soft/spark2}
export PYTHON_HOME=${PYTHON_HOME:-/bin/python}                --设置Python变量
export HIVE_HOME=${HIVE_HOME:-/opt/soft/hive}
export FLINK_HOME=${FLINK_HOME:-/opt/soft/flink}
export DATAX_HOME=${DATAX_HOME:-/usr/local/datax}            --设置datax路径
export SEATUNNEL_HOME=${SEATUNNEL_HOME:-/opt/soft/seatunnel}
export CHUNJUN_HOME=${CHUNJUN_HOME:-/opt/soft/chunjun}

#export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$PYTHON_HOME/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$SEATUNNEL_HOME/bin:$CHUNJUN_HOME/bin:$PATH
export PATH=$HADOOP_HOME/bin:$SPARK_HOME1/bin:$SPARK_HOME2/bin:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_HOME/bin:$SEATUNNEL_HOME/bin:$CHUNJUN_HOME/bin:$PATH



--复制mysql-connector-java

[root@datax ~]# cp mysql-connector-java-8.0.30.jar /usr/local/apache-dolphinscheduler/tools/libs/
[root@datax ~]# cp mysql-connector-java-8.0.30.jar /usr/local/apache-dolphinscheduler/master-server/libs/
[root@datax ~]# cp mysql-connector-java-8.0.30.jar /usr/local/apache-dolphinscheduler/worker-server/libs/
[root@datax ~]# cp mysql-connector-java-8.0.30.jar /usr/local/apache-dolphinscheduler/api-server/libs/
[root@datax ~]# cp mysql-connector-java-8.0.30.jar /usr/local/apache-dolphinscheduler/alert-server/libs/
[root@datax ~]# chown -R dolphinscheduler.dolphinscheduler /usr/local/apache-dolphinscheduler*


--初始化数据库
[root@datax ~]# cd /usr/local/apache-dolphinscheduler
[root@datax apache-dolphinscheduler]# bash tools/bin/upgrade-schema.sh


--查看MySQL数据库初始化是否成功

mysql> use dolphinscheduler
Database changed
mysql> show tables;
+----------------------------------------+
| Tables_in_dolphinscheduler             |
+----------------------------------------+
| qrtz_blob_triggers                     |
| qrtz_calendars                         |
| qrtz_cron_triggers                     |
| qrtz_fired_triggers                   |
| qrtz_job_details                       |
| qrtz_locks                             |
| qrtz_paused_trigger_grps               |
| qrtz_scheduler_state                   |
| qrtz_simple_triggers                   |
| qrtz_simprop_triggers                 |
| qrtz_triggers                         |
| t_ds_access_token                     |
| t_ds_alert                             |
| t_ds_alert_plugin_instance             |
| t_ds_alert_send_status                 |
| t_ds_alertgroup                       |
| t_ds_audit_log                         |
| t_ds_cluster                           |
| t_ds_command                           |
| t_ds_datasource                       |
| t_ds_dq_comparison_type               |
| t_ds_dq_execute_result                 |
| t_ds_dq_rule                           |
| t_ds_dq_rule_execute_sql               |
| t_ds_dq_rule_input_entry               |
| t_ds_dq_task_statistics_value         |
| t_ds_environment                       |
| t_ds_environment_worker_group_relation |
| t_ds_error_command                     |
| t_ds_fav_task                         |
| t_ds_k8s                               |
| t_ds_k8s_namespace                     |
| t_ds_plugin_define                     |
| t_ds_process_definition               |
| t_ds_process_definition_log           |
| t_ds_process_instance                 |
| t_ds_process_task_relation             |
| t_ds_process_task_relation_log         |
| t_ds_project                           |
| t_ds_queue                             |
| t_ds_relation_datasource_user         |
| t_ds_relation_namespace_user           |
| t_ds_relation_process_instance         |
| t_ds_relation_project_user             |
| t_ds_relation_resources_user           |
| t_ds_relation_rule_execute_sql         |
| t_ds_relation_rule_input_entry         |
| t_ds_relation_udfs_user               |
| t_ds_resources                         |
| t_ds_schedules                         |
| t_ds_session                           |
| t_ds_task_definition                   |
| t_ds_task_definition_log               |
| t_ds_task_group                       |
| t_ds_task_group_queue                 |
| t_ds_task_instance                     |
| t_ds_tenant                           |
| t_ds_udfs                             |
| t_ds_user                             |
| t_ds_version                           |
| t_ds_worker_group                     |
+----------------------------------------+
61 rows in set (0.00 sec)


--启动 DolphinScheduler

[root@datax ~]# su - dolphinscheduler
[dolphinscheduler@datax ~]$ cd /usr/local/apache-dolphinscheduler
[dolphinscheduler@datax apache-dolphinscheduler]$ bash ./bin/install.sh


--最终启动状态
====================== dolphinscheduler server config =============================
1.dolphinscheduler server node config hosts:[  192.168.100.82 ]
2.master server node config hosts:[  192.168.100.82 ]
3.worker server node config hosts:[  192.168.100.82:default ]
4.alert server node config hosts:[  192.168.100.82 ]
5.api server node config hosts:[  192.168.100.82 ]


====================== dolphinscheduler server status =============================
node server state


192.168.100.82 Begin status master-server......
master-server [ RUNNING ]
End status master-server.
192.168.100.82 Begin status worker-server......
worker-server [ RUNNING ]
End status worker-server.
192.168.100.82 Begin status alert-server......
alert-server [ RUNNING ]
End status alert-server.
192.168.100.82 Begin status api-server......
api-server [ RUNNING ]
End status api-server.




--登录 DolphinScheduler
浏览器访问地址 http://localhost:12345/dolphinscheduler/ui 即可登录系统UI。默认的用户名和密码是 admin/dolphinscheduler123
--临时更改密码为admin/tom.123

--启停服务

[root@datax ~]# su - dolphinscheduler
[dolphinscheduler@datax ~]$ cd /usr/local/apache-dolphinscheduler

# 一键停止集群所有服务
bash ./bin/stop-all.sh

# 一键开启集群所有服务
bash ./bin/start-all.sh

# 启停 Master
bash ./bin/dolphinscheduler-daemon.sh stop master-server
bash ./bin/dolphinscheduler-daemon.sh start master-server

# 启停 Worker
bash ./bin/dolphinscheduler-daemon.sh start worker-server
bash ./bin/dolphinscheduler-daemon.sh stop worker-server

# 启停 Api
bash ./bin/dolphinscheduler-daemon.sh start api-server
bash ./bin/dolphinscheduler-daemon.sh stop api-server

# 启停 Alert
bash ./bin/dolphinscheduler-daemon.sh start alert-server
bash ./bin/dolphinscheduler-daemon.sh stop alert-server
「喜欢这篇文章,您的关注和赞赏是给作者最好的鼓励」
关注作者
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。

评论

Rock Yan
暂无图片
关注
暂无图片
获得了210次点赞
暂无图片
内容获得59次评论
暂无图片
获得了345次收藏