-
Notifications
You must be signed in to change notification settings - Fork 501
Expand file tree
/
Copy paths3ToRedshift.py
More file actions
38 lines (32 loc) · 1.07 KB
/
s3ToRedshift.py
File metadata and controls
38 lines (32 loc) · 1.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from pgdb import connect
import os
import sys
from awsglue.utils import getResolvedOptions
# CONFIGURATION
redshiftHost = "xyz.redshift.amazonaws.com"
redshiftPort = "5439"
redshiftDatabase = "mydatabase"
redshiftUser = "myadmin"
redshiftPassword = "XYZ"
redshiftSchema = "myschema"
redshifttable = "mytable"
redshiftColumns = "timestamp,value_a,value_b,value_c"
DELIMITER = "\t"
DATEFORMAT = "YYYY-MM-DD"
# ARGUMENTS
args = getResolvedOptions(sys.argv, ["s3-bucket", "s3-object"])
s3Bucket = args["s3_bucket"]
s3Object = args["s3_object"]
con = connect(
host=redshiftHost + ":" + redshiftPort,
database=redshiftDatabase,
user=redshiftUser,
password=redshiftPassword,
)
cursor = con.cursor()
cursor.execute("set statement_timeout = 360000")
copyQuery = f"COPY {redshiftSchema}.{redshifttable}({redshiftColumns}) from 's3://{s3Bucket}/{s3Object}' iam_role 'arn:aws:iam::111111111111:role/LoadFromS3ToRedshiftJob' delimiter {DELIMITER} DATEFORMAT AS {DATEFORMAT} ROUNDEC TRUNCATECOLUMNS ESCAPE MAXERROR AS 500;"
cursor.execute(copyQuery)
con.commit()
cursor.close()
con.close()