Showing
3 changed files
with
37 additions
and
18 deletions
... | @@ -46,17 +46,15 @@ Recommended Commit Message : Remove unused imports | ... | @@ -46,17 +46,15 @@ Recommended Commit Message : Remove unused imports |
46 | To solve this problem, use a new embedding called [`patch_type_embeddings`](https://github.com/graykode/commit-autosuggestions/blob/master/commit/model/diff_roberta.py#L40) that can distinguish added and deleted, just as the sample et al, 2019 (XLM) used language embeddeding. (1 for added, 2 for deleted.) | 46 | To solve this problem, use a new embedding called [`patch_type_embeddings`](https://github.com/graykode/commit-autosuggestions/blob/master/commit/model/diff_roberta.py#L40) that can distinguish added and deleted, just as the sample et al, 2019 (XLM) used language embeddeding. (1 for added, 2 for deleted.) |
47 | 47 | ||
48 | ### Language support | 48 | ### Language support |
49 | -| Language | Added | Diff | | 49 | +| Language | Added | Diff | Data(Diff) | Weights | |
50 | -| :------------- | :---: | :---:| | 50 | +| :------------- | :---: | :---:| :---: | :---:| |
51 | -| Python | ✅ | ✅ | | 51 | +| Python | ✅ | ✅ | [link](https://drive.google.com/drive/folders/1_8lQmzTH95Nc-4MKd1RP3x4BVc8tBA6W?usp=sharing) | [link](https://drive.google.com/drive/folders/1OwM7_FiLiwVJAhAanBPWtPw3Hz3Dszbh?usp=sharing) | |
52 | -| JavaScript | ⬜ | ⬜ | | 52 | +| JavaScript | ⬜ | ⬜ | ⬜ | ⬜ | |
53 | -| Go | ⬜ | ⬜ | | 53 | +| Go | ⬜ | ⬜ | ⬜ | ⬜ | |
54 | -| JAVA | ⬜ | ⬜ | | 54 | +| JAVA | ⬜ | ⬜ | ⬜ | ⬜ | |
55 | -| Ruby | ⬜ | ⬜ | | 55 | +| Ruby | ⬜ | ⬜ | ⬜ | ⬜ | |
56 | -| PHP | ⬜ | ⬜ | | 56 | +| PHP | ⬜ | ⬜ | ⬜ | ⬜ | |
57 | * ✅ — Supported | 57 | * ✅ — Supported |
58 | -* 🔶 — Partial support | ||
59 | -* 🚧 — Under development | ||
60 | * ⬜ - N/A ️ | 58 | * ⬜ - N/A ️ |
61 | 59 | ||
62 | We plan to slowly conquer languages that are not currently supported. However, I also need to use expensive GPU instances of AWS or GCP to train about the above languages. Please do a simple sponsor for this! | 60 | We plan to slowly conquer languages that are not currently supported. However, I also need to use expensive GPU instances of AWS or GCP to train about the above languages. Please do a simple sponsor for this! |
... | @@ -68,9 +66,18 @@ To run this project, you need a flask-based inference server (GPU) and a client | ... | @@ -68,9 +66,18 @@ To run this project, you need a flask-based inference server (GPU) and a client |
68 | Prepare Docker and Nvidia-docker before running the server. | 66 | Prepare Docker and Nvidia-docker before running the server. |
69 | 67 | ||
70 | ##### 1-a. If you have GPU machine. | 68 | ##### 1-a. If you have GPU machine. |
71 | -Serve flask server with Nvidia Docker | 69 | +Serve flask server with Nvidia Docker. Check the docker tag for programming language in [here](https://hub.docker.com/repository/registry-1.docker.io/graykode/commit-autosuggestions/tags). |
70 | +| Language | Tag | | ||
71 | +| :------------- | :---: | | ||
72 | +| Python | py | | ||
73 | +| JavaScript | js | | ||
74 | +| Go | go | | ||
75 | +| JAVA | java | | ||
76 | +| Ruby | ruby | | ||
77 | +| PHP | php | | ||
78 | + | ||
72 | ```shell script | 79 | ```shell script |
73 | -$ docker run -it --gpus 0 -p 5000:5000 commit-autosuggestions:0.1-gpu | 80 | +$ docker run -it -d --gpus 0 -p 5000:5000 graykode/commit-autosuggestions:{language} |
74 | ``` | 81 | ``` |
75 | 82 | ||
76 | ##### 1-b. If you don't have GPU machine. | 83 | ##### 1-b. If you don't have GPU machine. | ... | ... |
... | @@ -10,14 +10,14 @@ ARG ADDED_MODEL="1YrkwfM-0VBCJaa9NYaXUQPODdGPsmQY4" | ... | @@ -10,14 +10,14 @@ ARG ADDED_MODEL="1YrkwfM-0VBCJaa9NYaXUQPODdGPsmQY4" |
10 | ARG DIFF_MODEL="1--gcVVix92_Fp75A-mWH0pJS0ahlni5m" | 10 | ARG DIFF_MODEL="1--gcVVix92_Fp75A-mWH0pJS0ahlni5m" |
11 | 11 | ||
12 | RUN git clone https://github.com/graykode/commit-autosuggestions.git /app/commit-autosuggestions \ | 12 | RUN git clone https://github.com/graykode/commit-autosuggestions.git /app/commit-autosuggestions \ |
13 | - && cd /app/commit-autosuggestions && python3 setup.py install | 13 | + && cd /app/commit-autosuggestions |
14 | 14 | ||
15 | WORKDIR /app/commit-autosuggestions | 15 | WORKDIR /app/commit-autosuggestions |
16 | 16 | ||
17 | RUN pip3 install ${PYTORCH_WHEEL} gdown | 17 | RUN pip3 install ${PYTORCH_WHEEL} gdown |
18 | -RUN gdown https://drive.google.com/uc?id=${ADDED_MODEL} -O weight/added/ | 18 | +RUN gdown https://drive.google.com/uc?id=${ADDED_MODEL} -O weight/python/added/ |
19 | -RUN gdown https://drive.google.com/uc?id=${DIFF_MODEL} -O weight/diff/ | 19 | +RUN gdown https://drive.google.com/uc?id=${DIFF_MODEL} -O weight/python/diff/ |
20 | 20 | ||
21 | RUN pip3 install -r requirements.txt | 21 | RUN pip3 install -r requirements.txt |
22 | 22 | ||
23 | -ENTRYPOINT ["python3", "app.py"] | 23 | +ENTRYPOINT ["python3", "app.py", "--load_model_path", "./weights/python/"] | ... | ... |
... | @@ -24,6 +24,15 @@ from multiprocessing.pool import Pool | ... | @@ -24,6 +24,15 @@ from multiprocessing.pool import Pool |
24 | from transformers import RobertaTokenizer | 24 | from transformers import RobertaTokenizer |
25 | from pydriller import RepositoryMining | 25 | from pydriller import RepositoryMining |
26 | 26 | ||
27 | +language = { | ||
28 | + 'py' : ['.py'], | ||
29 | + 'js' : ['.js', '.ts'], | ||
30 | + 'go' : ['.go'], | ||
31 | + 'java' : ['.java'], | ||
32 | + 'ruby' : ['.rb'], | ||
33 | + 'php' : ['.php'] | ||
34 | +} | ||
35 | + | ||
27 | def message_cleaner(message): | 36 | def message_cleaner(message): |
28 | msg = message.split("\n")[0] | 37 | msg = message.split("\n")[0] |
29 | msg = re.sub(r"(\(|)#([0-9])+(\)|)", "", msg) | 38 | msg = re.sub(r"(\(|)#([0-9])+(\)|)", "", msg) |
... | @@ -34,7 +43,7 @@ def jobs(repo, args): | ... | @@ -34,7 +43,7 @@ def jobs(repo, args): |
34 | repo_path = os.path.join(args.repos_dir, repo) | 43 | repo_path = os.path.join(args.repos_dir, repo) |
35 | if os.path.exists(repo_path): | 44 | if os.path.exists(repo_path): |
36 | for commit in RepositoryMining( | 45 | for commit in RepositoryMining( |
37 | - repo_path, only_modifications_with_file_types=['.py'] | 46 | + repo_path, only_modifications_with_file_types=language[args.lang] |
38 | ).traverse_commits(): | 47 | ).traverse_commits(): |
39 | cleaned_message = message_cleaner(commit.msg) | 48 | cleaned_message = message_cleaner(commit.msg) |
40 | tokenized_message = args.tokenizer.tokenize(cleaned_message) | 49 | tokenized_message = args.tokenizer.tokenize(cleaned_message) |
... | @@ -44,7 +53,7 @@ def jobs(repo, args): | ... | @@ -44,7 +53,7 @@ def jobs(repo, args): |
44 | for mod in commit.modifications: | 53 | for mod in commit.modifications: |
45 | if not (mod.old_path and mod.new_path): | 54 | if not (mod.old_path and mod.new_path): |
46 | continue | 55 | continue |
47 | - if os.path.splitext(mod.new_path)[1] != '.py': | 56 | + if os.path.splitext(mod.new_path)[1] not in language[args.lang]: |
48 | continue | 57 | continue |
49 | if not mod.diff_parsed["added"]: | 58 | if not mod.diff_parsed["added"]: |
50 | continue | 59 | continue |
... | @@ -121,6 +130,9 @@ if __name__ == "__main__": | ... | @@ -121,6 +130,9 @@ if __name__ == "__main__": |
121 | help="directory that all repositories had been downloaded.",) | 130 | help="directory that all repositories had been downloaded.",) |
122 | parser.add_argument("--output_dir", type=str, required=True, | 131 | parser.add_argument("--output_dir", type=str, required=True, |
123 | help="The output directory where the preprocessed data will be written.") | 132 | help="The output directory where the preprocessed data will be written.") |
133 | + parser.add_argument("--lang", type=str, required=True, | ||
134 | + choices=['py', 'js', 'go', 'java', 'ruby', 'php'], | ||
135 | + help="The output directory where the preprocessed data will be written.") | ||
124 | parser.add_argument("--tokenizer_name", type=str, | 136 | parser.add_argument("--tokenizer_name", type=str, |
125 | default="microsoft/codebert-base", help="The name of tokenizer",) | 137 | default="microsoft/codebert-base", help="The name of tokenizer",) |
126 | parser.add_argument("--num_workers", default=4, type=int, help="number of process") | 138 | parser.add_argument("--num_workers", default=4, type=int, help="number of process") | ... | ... |
-
Please register or login to post a comment