-
Notifications
You must be signed in to change notification settings - Fork 500
Expand file tree
/
Copy pathemail_scrape.py
More file actions
95 lines (86 loc) · 2.5 KB
/
email_scrape.py
File metadata and controls
95 lines (86 loc) · 2.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import os
import csv
import click
import requests
def run_query(token, repo):
"""A simple function to use requests.post to make the API call. Note the json= section."""
repo = repo.split("/")
name = repo[1]
owner = repo[0]
query = query_string(name, owner)
headers = {"Authorization": f"token {token}"}
res = requests.post(
"https://api.github.com/graphql", json={"query": query}, headers=headers
)
if res.status_code == 200:
return res.json()
raise Exception(
"Query failed to run by returning code of {}".format(res.status_code)
)
def query_string(name, owner):
"""it built query string on given name of repo and owner of repo"""
query = """
{
repository(name: "%s", owner: "%s") {
ref(qualifiedName: "master") {
target {
... on Commit {
id
history {
pageInfo {
hasNextPage
}
edges {
node {
author {
name
email
user {
name
}
}
}
}
}
}
}
}
}
}
""" % (
name,
owner,
)
return query
@click.command()
@click.argument("token", required=True, type=str)
@click.argument("repo", required=True, type=str)
def main(token, repo):
"""
This function takes token and repository name as input and fetch email,
name and username and store them in csv file \n
usage:
python email_scrap.py <token> <repo_name>
"""
with open("commits_email.csv", "w") as new_file:
fieldnames = ["Name", "Username", "Email"]
csv_writer = csv.DictWriter(new_file, fieldnames=fieldnames, delimiter=",")
csv_writer.writeheader()
result = run_query(token, repo)
unique_user_name = set()
for i in result["data"]["repository"]["ref"]["target"]["history"]["edges"]:
email = i["node"]["author"]["email"].split("+")[-1]
user_name = i["node"]["author"]["name"]
name = i["node"]["author"]["user"]["name"]
if user_name not in unique_user_name:
csv_writer.writerow(
{"Name": f"{name}", "Username": f"{user_name}", "Email": f"{email}"}
)
unique_user_name.add(user_name)
click.secho(
"\n-> 👍 Successfully Saved at " + f"{os.path.abspath(os.getcwd())}",
fg="green",
bold=True,
)
if __name__ == "__main__":
main()