WM3O3HSYIYXII5QZEHMUJZ3YYIX52FVOCSFJOCR54VZ4BURLQBBAC
# Package
version = "0.1.0"
author = "Barry Roberts"
description = "Read Utah firearm statutes (xml) from le.utah.gov and create an html document"
license = "MIT"
srcDir = "src"
bin = @["utfastatutes_nim"]
# Dependencies
requires "nim >= 0.19"
when defined(nimdistros):
import distros
if detectOs(Ubuntu):
foreignDep "libssl-dev"
else:
foreignDep "openssl"
#foreignDep "openssl"
import httpclient, nre, options, json,
strutils, strformat, strtabs,
xmlparser, xmltree, times, os
var indentWidth = 3
proc generateHtmlHeading(element: XmlNode, indent: int, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =
var number = element.attr("number")
htmlToc &= r"<a style=""margin-left: " & intToStr((indent-1)*indentWidth) & r"%;"" href=""#_" & number & "\"" & r">" & number & " " & innerText(child(element, "catchline")) & r"</a><br/>" & "\n"
faLinks[number] = number
htmlBody &= r"<h" & intToStr(indent) & r" id=""_" & number & "\"" & r">" & number & " " & innerText(child(element, "catchline")) & r"</h" & intToStr(indent) & r">" & "\n"
var effdate = child(element, "effdate")
if not effdate.isNil():
htmlBody &= r"<p style=""display: block; margin-left: " & intToStr((indent-1)*indentWidth) & r"%;"">(Effective " & innerText(effdate) & r")</p>" & "\n"
proc convertSubsectionToHtml(element: XmlNode, parentNumber: string, htmlToc: var string, htmlBody: var string, indent: int, faLinks: var StringTableRef) =
var number = element.attr("number")
var dispNum = number[parentNumber.len..^1]
htmlBody &= r"<p id=" & "\"_" & number & "\" " & r"style=""display: block; margin-left: " & intToStr((indent-1)*indentWidth) & r"%;"">" & dispNum & " "
faLinks[number] = number
for child in element:
if child.kind() == xnElement:
case child.tag():
of "xref":
var childNum = child.attr("refnumber")
var href = r"#_" & childNum
htmlBody &= r"<a href=" & "\"" & href & "\"" & r">" & innerText(child) & r"</a>"
if not faLinks.hasKey(childNum):
faLinks[childNum] = ""
of "subsection":
convertSubsectionToHtml(child, number, htmlToc, htmlBody, indent+1, faLinks)
of "histories":
echo "skipping histories"
of "catchline":
echo "skipping catchline"
of "tab":
echo "skipping tab"
else:
echo "Unrecognized node type: " & child.tag()
else:
if child.kind() == xnText:
htmlBody &= innerText(child) & " "
htmlBody &= r"</p>" & "\n"
proc convertSectionToHtml(element: XmlNode, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =
generateHtmlHeading(element, 3, htmlToc, htmlBody, faLinks)
var count = 0
var number = element.attr("number")
for subsection in items(element):
if subsection.kind == xnElement and subsection.tag == "subsection":
convertSubsectionToHtml(subsection, number, htmlToc, htmlBody, 3, faLinks)
count += 1
if count < 1:
# treat as a subsection if we didn't find any subsections
convertSubsectionToHtml(element, "", htmlToc, htmlBody, 3, faLinks)
proc convertChapterToHtml(element: XmlNode, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =
generateHtmlHeading(element, 1, htmlToc, htmlBody, faLinks)
for section in findAll(element, "section"):
convertSectionToHtml(section, htmlToc, htmlBody, faLinks)
htmlBody &= r"<hr/>" & "\n"
proc convertPartToHtml(element: XmlNode, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =
generateHtmlHeading(element, 2, htmlToc, htmlBody, faLinks)
for section in findAll(element, "section"):
convertSectionToHtml(section, htmlToc, htmlBody, faLinks)
htmlBody &= r"<br/>" & "\n"
proc convertXmlToHtml(statute: string, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =
var tree = parseXml(statute)
if tree.kind() == xnELement:
case tree.tag():
of "chapter":
convertChapterToHtml(tree, htmlToc, htmlBody, faLinks)
of "part":
convertPartToHtml(tree, htmlToc, htmlBody, faLinks)
of "section":
convertSectionToHtml(tree, htmlToc, htmlBody, faLinks)
else:
echo "Unknown root element tag: " & tree.tag()
else:
echo "Unknown root element kind: "
echo tree.kind()
proc writeHtml(fileName: string, htmlToc: string, htmlBody: string, page_title: string) =
var preamble = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
"""
preamble &= fmt"<title>{page_title} {getDateStr()}</title>"
preamble &= """
<style>
html,body{font-size:100%}
h1{font-size:2.125em; margin-left: 0%}
h2{font-size:1.6875em; margin-left: 3%}
h3{font-size:1.375em; margin-left: 6%}
h4{font-size:1.125em; margin-left: 9%}
h5{font-size:1.125em; margin-left: 12%}
h6{font-size:1em; margin-left: 15%}
hr{border:solid #ddddd8;border-width:1px 0 0;clear:both;margin:1.25em 0 1.1875em;height:0}
</style>
</head>
<body>
"""
preamble &= fmt"<h1>{page_title}</h1>"
preamble &= fmt"<p>Generated {getDateStr()}</p>"
preamble &= """
<p> See <a href="https://manithree.gitlab.io/utfastatutes/">https://manithree.gitlab.io/utfastatutes/</a> for the latest version, or
<a href="https://gitlab.com/manithree/utfastatutes">https://gitlab.com/manithree/utfastatutes</a> to report defects or make suggestions.</p>
<hr/>
"""
var post = """
</body>
</html>
"""
writeFile(fileName, preamble & htmlToc & htmlBody & post )
proc fixLinks(htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =
# this is where the external links are fixed up if I can figure out how
# to reliably link to le.utah.gov
echo "Fixing links"
for line in splitlines(htmlBody):
# TODO this only finds the first occurence
var m = line.find(re(r"href=""([^""]+)" & "\"" & r".?"))
if isSome(m):
#echo m.get.captures[0][2..^1]
let statute = m.get.captures[0][2..^1]
if faLinks[statute] != statute:
echo "Needs fixing: " & statute
when isMainModule:
var htmlToc = ""
var htmlBody = ""
var faLinks = newStringTable()
# Read the json config file
let settings = parseJson(readFile(paramStr(1)))
let codes = settings["code"]
let title = settings["title"].getStr()
let fileName = settings["filename"].getStr()
var client = newHttpClient()
for code in codes:
var pg = client.getContent(code.getStr())
# couldn't get multi-line regex to work, but this is probably more
# efficient, anyway:
var version = ""
for line in splitlines(pg):
var m = line.match(re"var versionDefault=""(.*)"";")
if isSome(m):
version = m.get.captures[0]
break
# download the base versioned file
var urlBase =code.getStr()[0..rfind(code.getStr(), '/')]
var statute = client.getContent(urlBase & "/" & version & "." & settings["download_format"].getStr())
# save the file (as downloaded)
writeFile(version & "." & settings["download_format"].getStr(), statute)
if settings["download_format"].getStr() == "xml" and
settings["output_format"].getStr() == "html":
convertXmlToHtml(statute, htmlToc, htmlBody, faLinks)
if settings["download_format"].getStr() == "xml" and
settings["output_format"].getStr() == "html":
fixLinks(htmlToc, htmlBody, faLinks)
writeHtml(fileName, htmlToc, htmlBody, title)
{
"output_format": "html",
"download_format": "xml",
"convert_cmd": "unused",
"title": "Utah Firearm Statutes",
"filename": "UtahFirearmStatutes.html",
"code": [
"https://le.utah.gov/xcode/ArticleI/Article_I,_Section_6.html",
"https://le.utah.gov/xcode/Title10/Chapter8/10-8-S47.html",
"https://le.utah.gov/xcode/Title29/Chapter2/29-2-S103.html",
"https://le.utah.gov/xcode/Title34/Chapter45/34-45-S103.html",
"https://le.utah.gov/xcode/Title53/Chapter2A/53-2a-S214.html",
"https://le.utah.gov/xcode/Title53/Chapter5A/53-5a.html",
"https://le.utah.gov/xcode/Title53B/Chapter3/53B-3-S103.html",
"https://le.utah.gov/xcode/Title76/Chapter1/76-1-S105.html",
"https://le.utah.gov/xcode/Title76/Chapter2/76-2-S406.html",
"https://le.utah.gov/xcode/Title76/Chapter2/76-2-S407.html",
"https://le.utah.gov/xcode/Title76/Chapter6/76-6-S206.html",
"https://le.utah.gov/xcode/Title76/Chapter8/76-8-S311.1.html",
"https://le.utah.gov/xcode/Title76/Chapter8/76-8-S311.3.html",
"https://le.utah.gov/xcode/Title76/Chapter10/76-10-P5.html",
"https://le.utah.gov/xcode/Title78A/Chapter2/78A-2-S203.html",
"https://le.utah.gov/xcode/Title78B/Chapter3/78B-3-S110.html"
]
}
{
"output_format": "html",
"download_format": "xml",
"convert_cmd": "unused",
"title": "Utah Bicycle Statutes",
"filename": "UtahBikeStatutes.html",
"code": [
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S102.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S208.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S305.html",
"https://le.utah.gov/xcode/Title41/Chapter6A/41-6a-S526.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S701.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S706.5.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S710.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S801.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S804.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S902.html",
"https://le.utah.gov/xcode/Title41/Chapter6A/41-6a-S1002.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1102.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1104.html",
"https://le.utah.gov/xcode/Title41/Chapter6A/41-6a-S1105.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1106.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1107.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1108.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1109.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1110.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1111.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1112.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1113.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1114.html",
"https://le.utah.gov/xcode/Title41/Chapter6A/41-6a-S1302.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1402.html",
"https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1603.html"
]
}
-d:ssl
--debugger:native
# Utah {Generic} Statutes Document Generator
There are lots of statutes at le.utah.gov that pertain to firearms: ownership,
selling, buying, carrying, defensive use, etc. Sometimes you might (I do,
anyway) want to have just the firearms-related statutes available offline. This
program aims to accomplish that.
And in it's latest version, it is now a more generic html booklet maker for
subsets of Utah code. Or, a specialized le.utah.gov scraper.
This latest version simplifies the settings file format, and includes one for
most bicycle-related statutes.
## Current
This is a standalone nim program that's primary purpose is to download the
latest version of statute sections (chapter, parts, sections, or subsections)
that relate to firearms, and optionally transform them.
It is configured with the settings `.json` file. Allowed values for the
`download_format` are:
xml
rtf
The only supported `output_format` is `html`. The `output_format` will be
ignored unless the `download_format` is `xml`.
When `download_format` is `xml` and `output_format`is `html` a single html page
will be generated containing all the statues, and a linked table of contents.
The xml files and the generated html file are saved in the cwd where the program
is run.
Two new required settings are `title` for the html title element and initial
H1, and `filename` for the destination html file.
## Building
To build from source:
git clone https://gitlab.com/manithree/utfastatutes.git
cd utfastatutes
nimble build
Then to run it, just:
./utfastatutes_nim settings.json
## History
I originally wrote a groovy script that downloaded the .rtf files from
le.utah.gov and used libreoffice and OoOPy to put it all together in one
document. I discovered later that I hadn't properly reverse engineered
le.utah.gov and it didn't always download the latest versions. It also didn't
have a table of contents.
So, I re-wrote it in rust, and fixed it so it that it always got the latest
version of the statute. I couldn't find a good ODF library in rust, but there's
a really good Java one, so I wrote another groovy script to take xml files
downloaded by the rust program and generate a libreoffice document with a table
of contents.
The next step was writing a groovy script to convert the xml to very simple
html with a table of contents. Then you don't need google doc/drive to view the
document, and you don't need any libreoffice tools at document generation time.
All that happened over the last several years on github.
This is a re-write in Nim (yeah, this project is my excuse to learn a new
language occasionally), and combine the download and html generation into one
package. I strongly prefer the html output, so I didn't include the ODF output
in the nim re-write. The latest version will download any format you configure
it for, but will only generate HTML (and only from XML).
MIT License
Copyright (c) 2019 Barry Roberts
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.