Simple regex-based lexer/parser for inline markup
Project description
ReParser
========
Simple regex-based lexer/parser for inline markup
Requirements
------------
- Python 3
Usage
-----
Example:
```
import re
from pprint import pprint
from reparser import Parser, Token, MatchGroup
boundary_chars = r'\s`!()\[\]{{}};:\'".,<>?«»“”‘’'
b_left = r'(?:(?<=[' + boundary_chars + r'])|(?<=^))' # Lookbehind
b_right = r'(?:(?=[' + boundary_chars + r'])|(?=$))' # Lookahead
markdown = b_left + r'(?P<start>{tag})(?P<text>\S.+?\S)(?P<end>{tag})' + b_right
markdown_link = r'(?P<start>\[)(?P<text>.+?)\]\((?P<url>.+?)(?P<end>\))'
newline = r'(?P<text>\n|\r\n)'
url_proto_re = re.compile(r'(?i)^[a-z][\w-]+:/{1,3}')
url_complete = lambda u: u if url_proto_re.search(u) else 'http://' + u
tokens = [
Token(markdown.format(tag=r'\*\*\*'), is_bold=True, is_italic=True),
Token(markdown.format(tag=r'___'), is_bold=True, is_italic=True),
Token(markdown.format(tag=r'\*\*'), is_bold=True),
Token(markdown.format(tag=r'__'), is_bold=True),
Token(markdown.format(tag=r'\*'), is_italic=True),
Token(markdown.format(tag=r'_'), is_italic=True),
Token(markdown.format(tag=r'~~'), is_strikethrough=True),
Token(markdown.format(tag=r'=='), is_underline=True),
Token(markdown_link, link_target=MatchGroup('url', func=url_complete)),
Token(newline, segment_type='LINE_BREAK')
]
parser = Parser(tokens)
text = ('Hello **bold** world!\n'
'You can **try *this* awesome** [link](www.eff.org).')
segments = parser.parse(text)
pprint([(segment.text, segment.params) for segment in segments])
```
Output:
```
[('Hello ', {}),
('bold', {'is_bold': True}),
(' world!', {}),
('\n', {'segment_type': 'LINE_BREAK'}),
('You can ', {}),
('try ', {'is_bold': True}),
('this', {'is_bold': True, 'is_italic': True}),
(' awesome', {'is_bold': True}),
(' ', {}),
('link', {'link_target': 'http://www.eff.org'}),
('.', {})]
```
========
Simple regex-based lexer/parser for inline markup
Requirements
------------
- Python 3
Usage
-----
Example:
```
import re
from pprint import pprint
from reparser import Parser, Token, MatchGroup
boundary_chars = r'\s`!()\[\]{{}};:\'".,<>?«»“”‘’'
b_left = r'(?:(?<=[' + boundary_chars + r'])|(?<=^))' # Lookbehind
b_right = r'(?:(?=[' + boundary_chars + r'])|(?=$))' # Lookahead
markdown = b_left + r'(?P<start>{tag})(?P<text>\S.+?\S)(?P<end>{tag})' + b_right
markdown_link = r'(?P<start>\[)(?P<text>.+?)\]\((?P<url>.+?)(?P<end>\))'
newline = r'(?P<text>\n|\r\n)'
url_proto_re = re.compile(r'(?i)^[a-z][\w-]+:/{1,3}')
url_complete = lambda u: u if url_proto_re.search(u) else 'http://' + u
tokens = [
Token(markdown.format(tag=r'\*\*\*'), is_bold=True, is_italic=True),
Token(markdown.format(tag=r'___'), is_bold=True, is_italic=True),
Token(markdown.format(tag=r'\*\*'), is_bold=True),
Token(markdown.format(tag=r'__'), is_bold=True),
Token(markdown.format(tag=r'\*'), is_italic=True),
Token(markdown.format(tag=r'_'), is_italic=True),
Token(markdown.format(tag=r'~~'), is_strikethrough=True),
Token(markdown.format(tag=r'=='), is_underline=True),
Token(markdown_link, link_target=MatchGroup('url', func=url_complete)),
Token(newline, segment_type='LINE_BREAK')
]
parser = Parser(tokens)
text = ('Hello **bold** world!\n'
'You can **try *this* awesome** [link](www.eff.org).')
segments = parser.parse(text)
pprint([(segment.text, segment.params) for segment in segments])
```
Output:
```
[('Hello ', {}),
('bold', {'is_bold': True}),
(' world!', {}),
('\n', {'segment_type': 'LINE_BREAK'}),
('You can ', {}),
('try ', {'is_bold': True}),
('this', {'is_bold': True, 'is_italic': True}),
(' awesome', {'is_bold': True}),
(' ', {}),
('link', {'link_target': 'http://www.eff.org'}),
('.', {})]
```
Project details
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
ReParser-1.1.tar.gz
(3.2 kB
view hashes)