From 0f8af8ecb4d09abb740ab8e11f3482c0b2e60ad4 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 21 Sep 2023 14:03:06 -0400 Subject: [PATCH 1/9] Current version from chatgpt --- scripts/produce_md_table_of_files.py | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100755 scripts/produce_md_table_of_files.py diff --git a/scripts/produce_md_table_of_files.py b/scripts/produce_md_table_of_files.py new file mode 100755 index 000000000..38b429d93 --- /dev/null +++ b/scripts/produce_md_table_of_files.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 + +import argparse +import subprocess +import os +from operator import itemgetter + +def git_first_commit_date(filename): + return subprocess.getoutput(f"git log --follow --format=%ai -- {filename} | tail -n 1") + +def git_describe_contains(commit_hash): + return subprocess.getoutput(f"git describe --contains {commit_hash}") + +def git_first_commit_hash(filename): + return subprocess.getoutput(f"git log --pretty=format:%H -- {filename} | tail -n 1") + +def main(): + parser = argparse.ArgumentParser(description="Generate a Markdown table for files in a Git repo.") + parser.add_argument("--header", type=str, required=True, help="Header for the Markdown file.") + parser.add_argument("-o", "--output", type=str, help="Output Markdown file name.") + parser.add_argument("files", nargs="+", help="Files to include in the table.") + args = parser.parse_args() + + output_lines = [] + output_lines.append(args.header + "\n") + output_lines.append("| File | Date | Version | Implemented State | Date | Version |") + output_lines.append("| ---- | ---- | ------- | ----------------- | ---- | ------- |") + + data = [] + + for filename in args.files: + if "-" in filename and filename.endswith(".md"): + date = git_first_commit_date(filename) + commit_hash = git_first_commit_hash(filename) + version = git_describe_contains(commit_hash) + data.append({ + "filename": filename, + "date": date, + "version": version + }) + + sorted_data = sorted(data, key=itemgetter('date')) + + for entry in sorted_data: + filename = entry["filename"] + date = entry["date"] + version = entry["version"] + output_lines.append(f"| [{filename}](./{filename}) | {date} | {version} | | | |") + + if args.output: + with open(args.output, "w") as f: + f.write("\n".join(output_lines) + "\n") + else: + print("\n".join(output_lines)) + +if __name__ == "__main__": + main() From 5bad7f3a3ebe0e5637fd3d037566009529314776 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 21 Sep 2023 14:57:21 -0400 Subject: [PATCH 2/9] next version: formatting of columns etc --- scripts/produce_md_table_of_files.py | 39 ++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/scripts/produce_md_table_of_files.py b/scripts/produce_md_table_of_files.py index 38b429d93..f5329f0e5 100755 --- a/scripts/produce_md_table_of_files.py +++ b/scripts/produce_md_table_of_files.py @@ -9,7 +9,13 @@ def git_first_commit_date(filename): return subprocess.getoutput(f"git log --follow --format=%ai -- {filename} | tail -n 1") def git_describe_contains(commit_hash): - return subprocess.getoutput(f"git describe --contains {commit_hash}") + result = subprocess.run( + ["git", "describe", "--contains", commit_hash], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + return result.stdout.strip() if result.returncode == 0 else "" def git_first_commit_hash(filename): return subprocess.getoutput(f"git log --pretty=format:%H -- {filename} | tail -n 1") @@ -22,9 +28,10 @@ def main(): args = parser.parse_args() output_lines = [] - output_lines.append(args.header + "\n") - output_lines.append("| File | Date | Version | Implemented State | Date | Version |") - output_lines.append("| ---- | ---- | ------- | ----------------- | ---- | ------- |") + output_lines.append(args.header) + + headers = ["File", "Date", "Version", "Implemented State", "Date", "Version", "Superseded by"] + col_widths = [len(h) for h in headers] data = [] @@ -33,19 +40,35 @@ def main(): date = git_first_commit_date(filename) commit_hash = git_first_commit_hash(filename) version = git_describe_contains(commit_hash) + + col_widths[0] = max(col_widths[0], len(filename)) + col_widths[1] = max(col_widths[1], len(date)) + col_widths[2] = max(col_widths[2], len(version)) + data.append({ "filename": filename, "date": date, "version": version }) + + col_widths[-3] = col_widths[1] + col_widths[-2] = col_widths[2] sorted_data = sorted(data, key=itemgetter('date')) + header_line = "| " + " | ".join([h.ljust(w) for h, w in zip(headers, col_widths)]) + " |" + separator_line = "| " + " | ".join(["-" * w for w in col_widths]) + " |" + + output_lines.append(header_line) + output_lines.append(separator_line) + for entry in sorted_data: - filename = entry["filename"] - date = entry["date"] - version = entry["version"] - output_lines.append(f"| [{filename}](./{filename}) | {date} | {version} | | | |") + filename = entry["filename"].ljust(col_widths[0]) + date = entry["date"].ljust(col_widths[1]) + version = entry["version"].ljust(col_widths[2]) + empty_cols = [" " * w for w in col_widths[3:]] + row_line = "| " + " | ".join([filename, date, version] + empty_cols) + " |" + output_lines.append(row_line) if args.output: with open(args.output, "w") as f: From ff35a01f94a1f6d4bd0e18eb751f89a0aaada626 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 21 Sep 2023 14:57:29 -0400 Subject: [PATCH 3/9] Tune up to header etc --- scripts/produce_md_table_of_files.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/produce_md_table_of_files.py b/scripts/produce_md_table_of_files.py index f5329f0e5..c7241873b 100755 --- a/scripts/produce_md_table_of_files.py +++ b/scripts/produce_md_table_of_files.py @@ -22,15 +22,15 @@ def git_first_commit_hash(filename): def main(): parser = argparse.ArgumentParser(description="Generate a Markdown table for files in a Git repo.") - parser.add_argument("--header", type=str, required=True, help="Header for the Markdown file.") + parser.add_argument("--header", type=str, help="Header for the Markdown file.") parser.add_argument("-o", "--output", type=str, help="Output Markdown file name.") parser.add_argument("files", nargs="+", help="Files to include in the table.") args = parser.parse_args() output_lines = [] - output_lines.append(args.header) + output_lines.extend(args.header.split(r'\n')) - headers = ["File", "Date", "Version", "Implemented State", "Date", "Version", "Superseded by"] + headers = ["File", "Originating Date", "Version", "Implementation State", "Date", "Version", "Superseded by"] col_widths = [len(h) for h in headers] data = [] From e3755be482d69566b2b37afb2b3c5d5670a0dfc4 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 21 Sep 2023 15:04:52 -0400 Subject: [PATCH 4/9] ENH: include just the date --- scripts/produce_md_table_of_files.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/produce_md_table_of_files.py b/scripts/produce_md_table_of_files.py index c7241873b..31a450fc3 100755 --- a/scripts/produce_md_table_of_files.py +++ b/scripts/produce_md_table_of_files.py @@ -2,11 +2,11 @@ import argparse import subprocess -import os from operator import itemgetter def git_first_commit_date(filename): - return subprocess.getoutput(f"git log --follow --format=%ai -- {filename} | tail -n 1") + date_str = subprocess.getoutput(f"git log --follow --format=%ai -- {filename} | tail -n 1") + return date_str.split()[0] # Return only the date portion, ignoring time and timezone def git_describe_contains(commit_hash): result = subprocess.run( @@ -28,7 +28,8 @@ def main(): args = parser.parse_args() output_lines = [] - output_lines.extend(args.header.split(r'\n')) + if args.header: + output_lines.extend(args.header.split(r'\n')) headers = ["File", "Originating Date", "Version", "Implementation State", "Date", "Version", "Superseded by"] col_widths = [len(h) for h in headers] From ae11c37d83d97e5d407c85db3d6d83f0cfc4c450 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 21 Sep 2023 15:05:50 -0400 Subject: [PATCH 5/9] RF: do it on all provided files, do not restrict name to have - --- scripts/produce_md_table_of_files.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/scripts/produce_md_table_of_files.py b/scripts/produce_md_table_of_files.py index 31a450fc3..a7d32f8d5 100755 --- a/scripts/produce_md_table_of_files.py +++ b/scripts/produce_md_table_of_files.py @@ -37,20 +37,19 @@ def main(): data = [] for filename in args.files: - if "-" in filename and filename.endswith(".md"): - date = git_first_commit_date(filename) - commit_hash = git_first_commit_hash(filename) - version = git_describe_contains(commit_hash) - - col_widths[0] = max(col_widths[0], len(filename)) - col_widths[1] = max(col_widths[1], len(date)) - col_widths[2] = max(col_widths[2], len(version)) + date = git_first_commit_date(filename) + commit_hash = git_first_commit_hash(filename) + version = git_describe_contains(commit_hash) + + col_widths[0] = max(col_widths[0], len(filename)) + col_widths[1] = max(col_widths[1], len(date)) + col_widths[2] = max(col_widths[2], len(version)) - data.append({ - "filename": filename, - "date": date, - "version": version - }) + data.append({ + "filename": filename, + "date": date, + "version": version + }) col_widths[-3] = col_widths[1] col_widths[-2] = col_widths[2] From e3c02283b1c29a609c9397dfe00f791de73ee9d8 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 21 Sep 2023 15:05:55 -0400 Subject: [PATCH 6/9] [DATALAD RUNCMD] Produce initial table of the design documents === Do not change lines below === { "chain": [], "cmd": "../../scripts/produce_md_table_of_files.py -o index.md --header \"# Design documents\\n\\n\" *-*.md", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "doc/design" } ^^^ Do not change lines above ^^^ --- doc/design/index.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 doc/design/index.md diff --git a/doc/design/index.md b/doc/design/index.md new file mode 100644 index 000000000..001692e75 --- /dev/null +++ b/doc/design/index.md @@ -0,0 +1,20 @@ +# Design documents + + +| File | Originating Date | Version | Implementation State | Date | Version | Superseded by | +| ---------------------------- | ---------------- | -------------- | -------------------- | ---------------- | -------------- | ------------- | +| blob-uuid-1.md | 2021-03-09 | v0.0.1~15^2~29 | | | | | +| doi-generation-1.md | 2021-04-05 | v0.1.0~22^2~14 | | | | | +| publish-1.md | 2021-05-20 | v0.1.0~21^2~6 | | | | | +| staging-1.md | 2021-06-08 | v0.1.0~15^2~3 | | | | | +| draft-metadata-migration.md | 2021-08-12 | v0.1.28~8^2~7 | | | | | +| new-user-questionnaire.md | 2021-08-12 | v0.1.21~1^2~7 | | | | | +| garbage-collection-1.md | 2021-10-11 | v0.2.18~4^2~4 | | | | | +| zarr-support-3.md | 2021-10-20 | v0.1.36~5^2~7 | | | | | +| embargo-full.md | 2021-12-07 | v0.1.37~4^2~20 | | | | | +| embargo-mvp.md | 2021-12-07 | v0.1.37~4^2~20 | | | | | +| apex-domain-netlify.md | 2022-04-08 | v0.2.8~2^2 | | | | | +| deployment-1.md | 2022-04-13 | v0.2.9~2^2~4 | | | | | +| asset-paths-1.md | 2022-09-06 | v0.3.0~11^2~27 | | | | | +| zarr-performance-redesign.md | 2022-12-19 | v0.3.11~7^2~5 | | | | | +| s3-trailing-delete.md | 2023-08-28 | | | | | | From 753b5d1487049e75934331adbbfc35a262d9be2f Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 13 Oct 2023 11:52:25 -0400 Subject: [PATCH 7/9] [DATALAD RUNCMD] Apply formatting using black etc === Do not change lines below === { "chain": [], "cmd": "tox -e format -- scripts/produce_md_table_of_files.py", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- scripts/produce_md_table_of_files.py | 37 ++++++++++++++++++---------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/scripts/produce_md_table_of_files.py b/scripts/produce_md_table_of_files.py index a7d32f8d5..4b94786a2 100755 --- a/scripts/produce_md_table_of_files.py +++ b/scripts/produce_md_table_of_files.py @@ -1,27 +1,33 @@ #!/usr/bin/env python3 import argparse -import subprocess from operator import itemgetter +import subprocess + def git_first_commit_date(filename): date_str = subprocess.getoutput(f"git log --follow --format=%ai -- {filename} | tail -n 1") return date_str.split()[0] # Return only the date portion, ignoring time and timezone + def git_describe_contains(commit_hash): result = subprocess.run( ["git", "describe", "--contains", commit_hash], stdout=subprocess.PIPE, stderr=subprocess.PIPE, - text=True + text=True, ) return result.stdout.strip() if result.returncode == 0 else "" + def git_first_commit_hash(filename): return subprocess.getoutput(f"git log --pretty=format:%H -- {filename} | tail -n 1") + def main(): - parser = argparse.ArgumentParser(description="Generate a Markdown table for files in a Git repo.") + parser = argparse.ArgumentParser( + description="Generate a Markdown table for files in a Git repo." + ) parser.add_argument("--header", type=str, help="Header for the Markdown file.") parser.add_argument("-o", "--output", type=str, help="Output Markdown file name.") parser.add_argument("files", nargs="+", help="Files to include in the table.") @@ -30,30 +36,34 @@ def main(): output_lines = [] if args.header: output_lines.extend(args.header.split(r'\n')) - - headers = ["File", "Originating Date", "Version", "Implementation State", "Date", "Version", "Superseded by"] + + headers = [ + "File", + "Originating Date", + "Version", + "Implementation State", + "Date", + "Version", + "Superseded by", + ] col_widths = [len(h) for h in headers] data = [] - + for filename in args.files: date = git_first_commit_date(filename) commit_hash = git_first_commit_hash(filename) version = git_describe_contains(commit_hash) - + col_widths[0] = max(col_widths[0], len(filename)) col_widths[1] = max(col_widths[1], len(date)) col_widths[2] = max(col_widths[2], len(version)) - data.append({ - "filename": filename, - "date": date, - "version": version - }) + data.append({"filename": filename, "date": date, "version": version}) col_widths[-3] = col_widths[1] col_widths[-2] = col_widths[2] - + sorted_data = sorted(data, key=itemgetter('date')) header_line = "| " + " | ".join([h.ljust(w) for h, w in zip(headers, col_widths)]) + " |" @@ -76,5 +86,6 @@ def main(): else: print("\n".join(output_lines)) + if __name__ == "__main__": main() From 7276269a35ac053bdaf7ff4e45cded9e077fe520 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 13 Oct 2023 11:53:31 -0400 Subject: [PATCH 8/9] [DATALAD RUNCMD] Replace all " with ' to please flake8 === Do not change lines below === { "chain": [], "cmd": "sed -e 's,\",'\"'\"',g' -i scripts/produce_md_table_of_files.py", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- scripts/produce_md_table_of_files.py | 54 ++++++++++++++-------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/scripts/produce_md_table_of_files.py b/scripts/produce_md_table_of_files.py index 4b94786a2..8b38a91ba 100755 --- a/scripts/produce_md_table_of_files.py +++ b/scripts/produce_md_table_of_files.py @@ -6,31 +6,31 @@ def git_first_commit_date(filename): - date_str = subprocess.getoutput(f"git log --follow --format=%ai -- {filename} | tail -n 1") + date_str = subprocess.getoutput(f'git log --follow --format=%ai -- {filename} | tail -n 1') return date_str.split()[0] # Return only the date portion, ignoring time and timezone def git_describe_contains(commit_hash): result = subprocess.run( - ["git", "describe", "--contains", commit_hash], + ['git', 'describe', '--contains', commit_hash], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, ) - return result.stdout.strip() if result.returncode == 0 else "" + return result.stdout.strip() if result.returncode == 0 else '' def git_first_commit_hash(filename): - return subprocess.getoutput(f"git log --pretty=format:%H -- {filename} | tail -n 1") + return subprocess.getoutput(f'git log --pretty=format:%H -- {filename} | tail -n 1') def main(): parser = argparse.ArgumentParser( - description="Generate a Markdown table for files in a Git repo." + description='Generate a Markdown table for files in a Git repo.' ) - parser.add_argument("--header", type=str, help="Header for the Markdown file.") - parser.add_argument("-o", "--output", type=str, help="Output Markdown file name.") - parser.add_argument("files", nargs="+", help="Files to include in the table.") + parser.add_argument('--header', type=str, help='Header for the Markdown file.') + parser.add_argument('-o', '--output', type=str, help='Output Markdown file name.') + parser.add_argument('files', nargs='+', help='Files to include in the table.') args = parser.parse_args() output_lines = [] @@ -38,13 +38,13 @@ def main(): output_lines.extend(args.header.split(r'\n')) headers = [ - "File", - "Originating Date", - "Version", - "Implementation State", - "Date", - "Version", - "Superseded by", + 'File', + 'Originating Date', + 'Version', + 'Implementation State', + 'Date', + 'Version', + 'Superseded by', ] col_widths = [len(h) for h in headers] @@ -59,33 +59,33 @@ def main(): col_widths[1] = max(col_widths[1], len(date)) col_widths[2] = max(col_widths[2], len(version)) - data.append({"filename": filename, "date": date, "version": version}) + data.append({'filename': filename, 'date': date, 'version': version}) col_widths[-3] = col_widths[1] col_widths[-2] = col_widths[2] sorted_data = sorted(data, key=itemgetter('date')) - header_line = "| " + " | ".join([h.ljust(w) for h, w in zip(headers, col_widths)]) + " |" - separator_line = "| " + " | ".join(["-" * w for w in col_widths]) + " |" + header_line = '| ' + ' | '.join([h.ljust(w) for h, w in zip(headers, col_widths)]) + ' |' + separator_line = '| ' + ' | '.join(['-' * w for w in col_widths]) + ' |' output_lines.append(header_line) output_lines.append(separator_line) for entry in sorted_data: - filename = entry["filename"].ljust(col_widths[0]) - date = entry["date"].ljust(col_widths[1]) - version = entry["version"].ljust(col_widths[2]) - empty_cols = [" " * w for w in col_widths[3:]] - row_line = "| " + " | ".join([filename, date, version] + empty_cols) + " |" + filename = entry['filename'].ljust(col_widths[0]) + date = entry['date'].ljust(col_widths[1]) + version = entry['version'].ljust(col_widths[2]) + empty_cols = [' ' * w for w in col_widths[3:]] + row_line = '| ' + ' | '.join([filename, date, version] + empty_cols) + ' |' output_lines.append(row_line) if args.output: - with open(args.output, "w") as f: - f.write("\n".join(output_lines) + "\n") + with open(args.output, 'w') as f: + f.write('\n'.join(output_lines) + '\n') else: - print("\n".join(output_lines)) + print('\n'.join(output_lines)) -if __name__ == "__main__": +if __name__ == '__main__': main() From 718ed38b926436ae1a40141a6196f57197274d94 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 13 Oct 2023 11:54:00 -0400 Subject: [PATCH 9/9] [DATALAD RUNCMD] Produce initial table of the design documents === Do not change lines below === { "chain": [ "e3c02283b1c29a609c9397dfe00f791de73ee9d8" ], "cmd": "../../scripts/produce_md_table_of_files.py -o index.md --header \"# Design documents\\n\\n\" *-*.md", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "doc/design" } ^^^ Do not change lines above ^^^ --- doc/design/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/design/index.md b/doc/design/index.md index 001692e75..b35f348d4 100644 --- a/doc/design/index.md +++ b/doc/design/index.md @@ -17,4 +17,4 @@ | deployment-1.md | 2022-04-13 | v0.2.9~2^2~4 | | | | | | asset-paths-1.md | 2022-09-06 | v0.3.0~11^2~27 | | | | | | zarr-performance-redesign.md | 2022-12-19 | v0.3.11~7^2~5 | | | | | -| s3-trailing-delete.md | 2023-08-28 | | | | | | +| s3-trailing-delete.md | 2023-08-28 | v0.3.52~6^2~3 | | | | |