Skip to content

Commit

Permalink
More languages (#13)
Browse files Browse the repository at this point in the history
* simplify file write and make_directories

simple file writing is simpler with pathlib.Path.write_text

For make_directories, python2 is end of life. python3 doesn't require
blank __init__.py files, so we can rely on pathlib.path.mkdir

* default to adding __init__.py files to directories

Python unittest discover still relies on __init__.py files and doesn't
support namespace packages. By default we can create these but allow for
their suppression if using other test frameworks.

* test that non markdown extension is ignored

* add per language test and data

* remove python indicator, switch to language

If mkcodes supports many language blocks, we can make the default
language python. safe mode should remove that default and not output
languages that aren't specified.

2 tests not passing

* introduce default language, lang mappings

rather than assume python, let's just recognize that's the default
language for unknown blocsk. People can override that as needed if they
want to put in some other assumptions.

* collect and write per-language code blocks

as we pass through a document, get per-language codeblocks, then write
those out to the correct extensionfile. By default, everything is still
python - though it can be overridden

* make other languages work

Now the default language is python
codeblocks in other languages can be broken out into their own language
files. There is a way to add more languages with mappings, but a
language fenced codeblock will default to use that language as the
output extension

* add test for unmapped extension

* clean up __init__.py addition logic

Also remove some commented code, shorten comments

* test __init__.py paths

For non python paths, don't declare them to be python packages.

* correct spelling mistakes

also conform new test to self.call

* separate 3rd party imports with a blank line

* cleanup

* Ignore capitalization of language strings.

* Remove no-op else clause.

* Document extension/language mappings.

* Language should be reset even if --unsafe.

The safety flag refers to whether a default language is used. This would
have made it also accept the previous language used, which doesn't seem
right.

* Add warning when unhinted code blocks are skipped.

* Use dash for cli parameters and fix typo.

* Pull sanity check out of if-statement.

* Don't create __init__.py in base output directory.

* Avoid abusing return as a mid-function "break".

I would consider this anti-pattern when avoidable and I find it harder to
understand.

* Remove redundant test assertion.

Co-authored-by: Matt Katz <[email protected]>
Co-authored-by: ryneeverett <[email protected]>
  • Loading branch information
3 people authored Aug 12, 2020
1 parent c27b4b8 commit 16c8f9a
Show file tree
Hide file tree
Showing 8 changed files with 226 additions and 31 deletions.
108 changes: 78 additions & 30 deletions mkcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,48 +13,82 @@
from markdown.extensions import Extension
from markdown.treeprocessors import Treeprocessor


def github_codeblocks(filepath, safe):
codeblocks = []
# There does not seem to be any specification for which info strings are
# accepted, but python-markdown passes it directly to pygments, so their
# mapping can be used as a guide:
# https://github.com/pygments/pygments/blob/master/pygments/lexers/_mapping.py
ext_map = {
'cs': ['c#', 'csharp', 'c-sharp'],
'py': ['python', 'python2', 'python3', 'py2', 'py3'],
}
# It's more straightforward to express the mappings by extension, but we
# actually need an inverted mapping.
language_map = {}
for ext, lang_strings in ext_map.items():
for lang_string in lang_strings:
language_map[lang_string] = ext


def github_codeblocks(filepath, safe, default_lang='py'):
codeblocks = {}
codeblock_re = r'^```.*'
codeblock_open_re = r'^```(`*)(py|python){0}$'.format('' if safe else '?')
codeblock_open_re = r'^```(`*)(\w+){0}$'.format('' if safe else '?')

with open(filepath, 'r') as f:
# Initialize State
block = []
python = True
language = None
in_codeblock = False

for line in f.readlines():
# does this line contain a codeblock begin or end?
codeblock_delimiter = re.match(codeblock_re, line)

if in_codeblock:
if codeblock_delimiter:
if python:
codeblocks.append(''.join(block))
# we are closing a codeblock
if language:
# finished a codeblock, append everything
ext = language_map.get(language, language)
codeblocks.setdefault(ext, []).append(''.join(block))
else:
warnings.warn('No language hint found in safe mode. ' +
'Skipping block beginning with: ' +
block[0])

# Reset State
block = []
python = True
language = None
in_codeblock = False
else:
block.append(line)
elif codeblock_delimiter:
# beginning a codeblock
in_codeblock = True
if not re.match(codeblock_open_re, line):
python = False
# does it have a language?
lang_match = re.match(codeblock_open_re, line)
if lang_match:
language = lang_match.group(2)
language = language.lower() if language else language
if not safe:
# we can sub a default language if not safe
language = language or default_lang
return codeblocks


def markdown_codeblocks(filepath, safe):
def markdown_codeblocks(filepath, safe, default_lang='py'):
import markdown

codeblocks = []
codeblocks = {}

if safe:
warnings.warn("'safe' option not available in 'markdown' mode.")

class DoctestCollector(Treeprocessor):
def run(self, root):
nonlocal codeblocks
codeblocks = (block.text for block in root.iterfind('./pre/code'))
codeblocks[default_lang] = (
block.text for block in root.iterfind('./pre/code'))

class DoctestExtension(Extension):
def extendMarkdown(self, md, md_globals):
Expand All @@ -63,7 +97,7 @@ def extendMarkdown(self, md, md_globals):

doctestextension = DoctestExtension()
markdowner = markdown.Markdown(extensions=[doctestextension])
markdowner.convertFile(str(filepath), output=os.devnull)
markdowner.convertFile(input=str(filepath), output=os.devnull)
return codeblocks


Expand All @@ -79,43 +113,57 @@ def get_files(inputs):
elif path.suffix in markdown_extensions:
yield path, path.parent

def add_inits_to_dir(path):

def add_inits_along_path(from_path, to_path):
"""Recursively add __init__.py files to a directory
This compensates for https://bugs.python.org/issue23882 and https://bugs.python.org/issue35617
This compensates for https://bugs.python.org/issue23882
and https://bugs.python.org/issue35617
"""
for child in path.rglob('*'):
if child.is_dir():
(child / '__init__.py').touch()
to_path = to_path.expanduser().resolve()
from_path = from_path.expanduser().resolve()

# Sanity Check: This will raise an exception if paths aren't relative.
to_path.relative_to(from_path)

# Continue recursing if we haven't reached the base output directory.
if to_path != from_path:
(to_path / '__init__.py').touch()
add_inits_along_path(from_path, to_path.parent)


@click.command()
@click.argument(
'inputs', nargs=-1, required=True, type=click.Path(exists=True))
@click.option('--output', default='{name}.py')
@click.option('--output', default='{name}.{ext}')
@click.option('--github/--markdown', default=bool(not markdown_enabled),
help='Github-flavored fence blocks or pure markdown.')
@click.option('--safe/--unsafe', default=True,
help='Allow code blocks without language hints.')
@click.option('--package-python', default=True,
help='Add __init__.py files to python output to aid in test discovery')
def main(inputs, output, github, safe, package_python):
help='Add __init__.py files to python dirs for test discovery')
@click.option('--default-lang', default='py',
help='Assumed language for code blocks without language hints.')
def main(inputs, output, github, safe, package_python, default_lang):
collect_codeblocks = github_codeblocks if github else markdown_codeblocks
outputbasedir = Path(output).parent
outputbasename = Path(output).name

for filepath, input_path in get_files(inputs):
codeblocks = collect_codeblocks(filepath, safe)
codeblocks = collect_codeblocks(filepath, safe, default_lang)

if codeblocks:
fp = Path(filepath)
filedir = fp.parent.relative_to(input_path)
filename = fp.stem
outputfilename = outputbasedir / filedir / outputbasename.format(name=filename)

outputfilename.parent.mkdir(parents=True, exist_ok=True)
outputfilename.write_text('\n\n'.join(codeblocks))
if package_python:
add_inits_to_dir(outputbasedir)


# stitch together the OUTPUT base directory with input directories
# add the file format at the end.
for lang, blocks in codeblocks.items():
outputfilename = outputbasedir / filedir /\
outputbasename.format(name=filename, ext=lang)

# make sure path exists, don't care if it already does
outputfilename.parent.mkdir(parents=True, exist_ok=True)
outputfilename.write_text('\n\n'.join(blocks))
if package_python and lang == 'py':
add_inits_along_path(outputbasedir, outputfilename.parent)
2 changes: 1 addition & 1 deletion tests/data/nest/more/why.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# why?

We want to make sure that in more complext documentation structures, which may have multiple sub directories, we are still formatting name and paths correctly.
We want to make sure that in more complex documentation structures, which may have multiple sub directories, we are still formatting name and paths correctly.

```py
import unittest
Expand Down
34 changes: 34 additions & 0 deletions tests/langdata/csharp.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# dotNet is still a thing

What if you could provide a code sample here?

```cs
public void Sum(int a, int b)
{
return a + b;
}
```

And we know that it is testable.

```csharp
[Testclass]
public class UnitTest1
{
[TestMethod]
public void TestMethod1()
{
//Arrange
ApplicationToTest.Calc ClassCalc = new ApplicationToTest.Calc();
int expectedResult = 5;

//Act
int result = ClassCalc.Sum(2,3);

//Assert
Assert.AreEqual(expectedResult, result);
}
}
```

Actually checking and running these tests, that's a different matter.
32 changes: 32 additions & 0 deletions tests/langdata/java.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Java documentation is important

That's a language still. Here's a java codeblock:

```java
public class MyUnit {
public String concatenate(String one, String two){
return one + two;
}
}
```

And since we have that class, let's test it

```java
import org.junit.Test;
import static org.junit.Assert.*;

public class MyUnitTest {

@Test
public void testConcatenate() {
MyUnit myUnit = new MyUnit();

String result = myUnit.concatenate("one", "two");

assertEquals("onetwo", result);

}
}

```
34 changes: 34 additions & 0 deletions tests/langdata/multilang.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Comparing and contrasting

For some ideas about an api, we might give getting started code in a simple getting started page.

In a pinch, let's hello that world.

```py
print("hello, world")
```

But maybe we want this to be enterprise grade?

```java
class HelloWorld {
public static void main(String[] args) {
System.out.println("Hello, World!");
}
}
```

New orders from the CTO: let's use Azure cloud.
```cs
class HelloWorld {
static void Main() {
System.Console.WriteLine("Hello World");
}
}
```

We want to have a react vue jquery frontend. Assume that the code sample below has a testable extension as the language

```js
console.log('Hello, world");
```
16 changes: 16 additions & 0 deletions tests/langdata/no_py_tree/clean.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Cleanliness

If there are no python files in a directory, we don't need to add an __init__.py file to that directory. Sure, they don't hurt, but having them where they aren't needed isn't very tidy and might be confusing.

Speaking of confusing, lets test javascript
```js
function assert(condition, message) {
if (!condition) {
message = message || "Assertion failed";
throw new Error(message);
}
}

assert([]+[]=="", "very sensible, adding arrays is a string")
assert({}+[]==0, "of course adding a dict to an array is 0")
```
12 changes: 12 additions & 0 deletions tests/langdata/pytree/buried.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Test discovery

For test discovery to work for unittest, python files generated from this document must have an `__init__.py` file added to the directory - otherwise they won't be considered testable packages.

```python
import unittest

class TestDiscovery(unittest.TestCase):
def test_discovery(self):
self.assertTrue(True)

```
19 changes: 19 additions & 0 deletions tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,25 @@ def test_prefixed_deep_blocks(self):
self.assertIn('Ran 2 tests', proc.stderr)
self.assertIn('OK', proc.stderr)

def test_other_languages(self):
self.call(
'--output', 'tests/output/test_{name}.{ext}',
'--github', 'tests/langdata')
self.assertTrue(self._output_path_exists('test_java.java'))
self.assertTrue(self._output_path_exists('test_csharp.cs'))
self.assertFalse(self._output_path_exists('test_csharp.csharp'))
self.assertTrue(self._output_path_exists('test_multilang.cs'))
self.assertTrue(self._output_path_exists('test_multilang.java'))
self.assertTrue(self._output_path_exists('test_multilang.py'))
self.assertTrue(self._output_path_exists('test_multilang.js'))
self.assertTrue(self._output_path_exists('no_py_tree/test_clean.js'))
self.assertFalse(self._output_path_exists('no_py_tree/__init__.py'))
self.assertTrue(self._output_path_exists('pytree/test_buried.py'))
self.assertTrue(self._output_path_exists('pytree/__init__.py'))

# __init__.py should not be created in the base output directory.
self.assertFalse(self._output_path_exists('__init__.py'))

@unittest.skip
def test_glob(self):
raise NotImplementedError
Expand Down

0 comments on commit 16c8f9a

Please sign in to comment.