Skip to content

Commit

Permalink
Add ruff and perform initial reformat #512 #515
Browse files Browse the repository at this point in the history
Signed-off-by: Jono Yang <[email protected]>
  • Loading branch information
JonoYang committed Aug 13, 2024
1 parent f164569 commit 57f9cc0
Show file tree
Hide file tree
Showing 203 changed files with 12,798 additions and 12,068 deletions.
334 changes: 177 additions & 157 deletions clearcode/cdutils.py

Large diffs are not rendered by default.

59 changes: 27 additions & 32 deletions clearcode/load.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) nexB Inc. and others. All rights reserved.
#
Expand All @@ -16,16 +15,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import multiprocessing
import os
from pathlib import Path
import sys

from django.db.utils import IntegrityError

import click


"""
Load ClearlyDefined definitions and harvests from the filesystem
Expand All @@ -52,30 +48,32 @@ def walk_and_load_from_filesystem(input_dir, cd_root_dir):
CDitem.path = npm/npmjs/@actions/github/revision/2.1.1.json.gz
CDitem.content = 'the file: 2.1.1.json.gz in bytes'
"""

# for now, we count dirs too
file_counter = 1
for root, dirs, files in os.walk(input_dir):
for filename in files:
# output some progress
print(' ', end='\r')
print("Processing file #{}".format(file_counter), end='\r')
file_counter +=1
print(" ", end="\r")
print(f"Processing file #{file_counter}", end="\r")
file_counter += 1

# TODO: check if the location is actually a CD data item.
full_gzip_path = os.path.join(root, filename)
full_json_path = full_gzip_path.rstrip('.gz')
full_json_path = full_gzip_path.rstrip(".gz")

# normalize the `path` value by removing the arbitrary parent directory
cditem_rel_path = os.path.relpath(full_json_path, cd_root_dir)

with open(full_gzip_path, mode='rb') as f:
with open(full_gzip_path, mode="rb") as f:
content = f.read()

from clearcode import models

# Save to DB
try:
cditem = models.CDitem.objects.create(path=cditem_rel_path, content=content)
cditem = models.CDitem.objects.create(
path=cditem_rel_path, content=content
)
except IntegrityError:
# skip if we already have it in the DB
continue
Expand All @@ -87,41 +85,38 @@ def load(input_dir=None, cd_root_dir=None, *arg, **kwargs):
creating CDItem objects and loading them into a PostgreSQL database.
"""
if not input_dir:
sys.exit('Please specify an input directory using the `--input-dir` option.')
sys.exit("Please specify an input directory using the `--input-dir` option.")
if not cd_root_dir:
sys.exit('Please specify the cd-root-directory using the --cd-root-dir option.')
sys.exit("Please specify the cd-root-directory using the --cd-root-dir option.")

# get proper DB setup

walk_and_load_from_filesystem(input_dir, cd_root_dir)
print(' ', end='\r')
print(" ", end="\r")
print("Loading complete")


@click.command()

@click.option('--input-dir',
type=click.Path(), metavar='DIR',
help='Load content from this input directory that contains a tree of gzip-compressed JSON CD files')

@click.option('--cd-root-dir',
type=click.Path(), metavar='DIR',
help='specify root directory that contains a tree of gzip-compressed JSON CD files')

@click.help_option('-h', '--help')

@click.option(
"--input-dir",
type=click.Path(),
metavar="DIR",
help="Load content from this input directory that contains a tree of gzip-compressed JSON CD files",
)
@click.option(
"--cd-root-dir",
type=click.Path(),
metavar="DIR",
help="specify root directory that contains a tree of gzip-compressed JSON CD files",
)
@click.help_option("-h", "--help")
def cli(input_dir=None, cd_root_dir=None, *arg, **kwargs):
"""
Handle ClearlyDefined gzipped JSON scans by walking a clearsync directory structure,
creating CDItem objects and loading them into a PostgreSQL database.
"""
load(
input_dir=input_dir,
cd_root_dir=cd_root_dir,
*arg,
**kwargs
)
load(input_dir=input_dir, cd_root_dir=cd_root_dir, *arg, **kwargs)


if __name__ == '__main__':
if __name__ == "__main__":
cli()
23 changes: 11 additions & 12 deletions clearcode/management/commands/clearload.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,22 @@ class Command(VerboseCommand):

def add_arguments(self, parser):
parser.add_argument(
'--input-dir',
dest='input_dir',
"--input-dir",
dest="input_dir",
default=None,
type=str,
help='Load content from this input directory that contains a tree of gzip-compressed JSON CD files')
help="Load content from this input directory that contains a tree of gzip-compressed JSON CD files",
)
parser.add_argument(
'--cd-root-dir',
dest='cd_root_dir',
"--cd-root-dir",
dest="cd_root_dir",
default=None,
type=str,
help='Specify root directory that contains a tree of gzip-compressed JSON CD files')
help="Specify root directory that contains a tree of gzip-compressed JSON CD files",
)

def handle(self, *args, **options):
input_dir = options.get('input_dir')
cd_root_dir = options.get('cd_root_dir')
input_dir = options.get("input_dir")
cd_root_dir = options.get("cd_root_dir")

load(
input_dir=input_dir,
cd_root_dir=cd_root_dir
)
load(input_dir=input_dir, cd_root_dir=cd_root_dir)
110 changes: 60 additions & 50 deletions clearcode/management/commands/clearsync.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,76 +20,86 @@ class Command(VerboseCommand):

def add_arguments(self, parser):
parser.add_argument(
'--output-dir',
dest='output_dir',
"--output-dir",
dest="output_dir",
default=None,
type=str,
help='Save fetched content as compressed gzipped files to this output directory.')
help="Save fetched content as compressed gzipped files to this output directory.",
)
parser.add_argument(
'--save-to-db',
dest='save_to_db',
action='store_true',
help='Save fetched content as compressed gzipped blobs in the configured database.')
"--save-to-db",
dest="save_to_db",
action="store_true",
help="Save fetched content as compressed gzipped blobs in the configured database.",
)
parser.add_argument(
'--unsorted',
dest='unsorted',
action='store_true',
help='Fetch data without any sorting. The default is to fetch data sorting by latest updated first.')
"--unsorted",
dest="unsorted",
action="store_true",
help="Fetch data without any sorting. The default is to fetch data sorting by latest updated first.",
)
parser.add_argument(
'--base-api-url',
dest='base_api_url',
default='https://api.clearlydefined.io',
help='ClearlyDefined base API URL.')
"--base-api-url",
dest="base_api_url",
default="https://api.clearlydefined.io",
help="ClearlyDefined base API URL.",
)
parser.add_argument(
'--wait',
dest='wait',
"--wait",
dest="wait",
default=60,
type=int,
help='Set the number of seconds to wait for new or updated definitions '
'between two loops.')
help="Set the number of seconds to wait for new or updated definitions "
"between two loops.",
)
parser.add_argument(
'-n',
'--processes',
dest='processes',
"-n",
"--processes",
dest="processes",
default=1,
type=int,
help='Set the number of parallel processes to use. '
'Disable parallel processing if 0.')
help="Set the number of parallel processes to use. "
"Disable parallel processing if 0.",
)
parser.add_argument(
'--max-def',
dest='max_def',
"--max-def",
dest="max_def",
default=0,
type=int,
help='Set the maximum number of definitions to fetch.')
help="Set the maximum number of definitions to fetch.",
)
parser.add_argument(
'--only-definitions',
dest='only_definitions',
action='store_true',
help='Only fetch definitions and no other data item.')
"--only-definitions",
dest="only_definitions",
action="store_true",
help="Only fetch definitions and no other data item.",
)
parser.add_argument(
'--log-file',
dest='log_file',
"--log-file",
dest="log_file",
default=None,
type=str,
help='Path to a file where to log fetched paths, one per line. '
'Log entries will be appended to this file if it exists.')
help="Path to a file where to log fetched paths, one per line. "
"Log entries will be appended to this file if it exists.",
)
parser.add_argument(
'--verbose',
dest='verbose',
action='store_true',
help='Display more verbose progress messages.')
"--verbose",
dest="verbose",
action="store_true",
help="Display more verbose progress messages.",
)

def handle(self, *args, **options):
output_dir = options.get('output_dir')
save_to_db = options.get('save_to_db')
base_api_url = options.get('base_api_url')
wait = options.get('wait')
processes = options.get('processes')
unsorted = options.get('unsorted')
log_file = options.get('log_file')
max_def = options.get('max_def')
only_definitions = options.get('only_definitions')
verbose = options.get('verbose')
output_dir = options.get("output_dir")
save_to_db = options.get("save_to_db")
base_api_url = options.get("base_api_url")
wait = options.get("wait")
processes = options.get("processes")
unsorted = options.get("unsorted")
log_file = options.get("log_file")
max_def = options.get("max_def")
only_definitions = options.get("only_definitions")
verbose = options.get("verbose")

sync(
output_dir=output_dir,
Expand All @@ -101,5 +111,5 @@ def handle(self, *args, **options):
log_file=log_file,
max_def=max_def,
only_definitions=only_definitions,
verbose=verbose
verbose=verbose,
)
13 changes: 8 additions & 5 deletions clearcode/management/commands/store_scans.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,16 @@


class Command(VerboseCommand):
help = 'Store scancode scans in git repositories'
help = "Store scancode scans in git repositories"

def add_arguments(self, parser):
parser.add_argument('work_dir', type=str)
parser.add_argument('--github_org', type=str, default="")
parser.add_argument('--count', type=int, default=0)
parser.add_argument("work_dir", type=str)
parser.add_argument("--github_org", type=str, default="")
parser.add_argument("--count", type=int, default=0)

def handle(self, *args, **options):
store_scancode_scans_from_cd_items(
work_dir=options['work_dir'], github_org=options['github_org'], count=options['count'])
work_dir=options["work_dir"],
github_org=options["github_org"],
count=options["count"],
)
Loading

0 comments on commit 57f9cc0

Please sign in to comment.