WM3O3HSYIYXII5QZEHMUJZ3YYIX52FVOCSFJOCR54VZ4BURLQBBAC # Packageversion = "0.1.0"author = "Barry Roberts"description = "Read Utah firearm statutes (xml) from le.utah.gov and create an html document"license = "MIT"srcDir = "src"bin = @["utfastatutes_nim"]# Dependenciesrequires "nim >= 0.19"when defined(nimdistros):import distrosif detectOs(Ubuntu):foreignDep "libssl-dev"else:foreignDep "openssl"#foreignDep "openssl"
import httpclient, nre, options, json,strutils, strformat, strtabs,xmlparser, xmltree, times, osvar indentWidth = 3proc generateHtmlHeading(element: XmlNode, indent: int, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =var number = element.attr("number")htmlToc &= r"<a style=""margin-left: " & intToStr((indent-1)*indentWidth) & r"%;"" href=""#_" & number & "\"" & r">" & number & " " & innerText(child(element, "catchline")) & r"</a><br/>" & "\n"faLinks[number] = numberhtmlBody &= r"<h" & intToStr(indent) & r" id=""_" & number & "\"" & r">" & number & " " & innerText(child(element, "catchline")) & r"</h" & intToStr(indent) & r">" & "\n"var effdate = child(element, "effdate")if not effdate.isNil():htmlBody &= r"<p style=""display: block; margin-left: " & intToStr((indent-1)*indentWidth) & r"%;"">(Effective " & innerText(effdate) & r")</p>" & "\n"proc convertSubsectionToHtml(element: XmlNode, parentNumber: string, htmlToc: var string, htmlBody: var string, indent: int, faLinks: var StringTableRef) =var number = element.attr("number")var dispNum = number[parentNumber.len..^1]htmlBody &= r"<p id=" & "\"_" & number & "\" " & r"style=""display: block; margin-left: " & intToStr((indent-1)*indentWidth) & r"%;"">" & dispNum & " "faLinks[number] = numberfor child in element:if child.kind() == xnElement:case child.tag():of "xref":var childNum = child.attr("refnumber")var href = r"#_" & childNumhtmlBody &= r"<a href=" & "\"" & href & "\"" & r">" & innerText(child) & r"</a>"if not faLinks.hasKey(childNum):faLinks[childNum] = ""of "subsection":convertSubsectionToHtml(child, number, htmlToc, htmlBody, indent+1, faLinks)of "histories":echo "skipping histories"of "catchline":echo "skipping catchline"of "tab":echo "skipping tab"else:echo "Unrecognized node type: " & child.tag()else:if child.kind() == xnText:htmlBody &= innerText(child) & " "htmlBody &= r"</p>" & "\n"proc convertSectionToHtml(element: XmlNode, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =generateHtmlHeading(element, 3, htmlToc, htmlBody, faLinks)var count = 0var number = element.attr("number")for subsection in items(element):if subsection.kind == xnElement and subsection.tag == "subsection":convertSubsectionToHtml(subsection, number, htmlToc, htmlBody, 3, faLinks)count += 1if count < 1:# treat as a subsection if we didn't find any subsectionsconvertSubsectionToHtml(element, "", htmlToc, htmlBody, 3, faLinks)proc convertChapterToHtml(element: XmlNode, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =generateHtmlHeading(element, 1, htmlToc, htmlBody, faLinks)for section in findAll(element, "section"):convertSectionToHtml(section, htmlToc, htmlBody, faLinks)htmlBody &= r"<hr/>" & "\n"proc convertPartToHtml(element: XmlNode, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =generateHtmlHeading(element, 2, htmlToc, htmlBody, faLinks)for section in findAll(element, "section"):convertSectionToHtml(section, htmlToc, htmlBody, faLinks)htmlBody &= r"<br/>" & "\n"proc convertXmlToHtml(statute: string, htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =var tree = parseXml(statute)if tree.kind() == xnELement:case tree.tag():of "chapter":convertChapterToHtml(tree, htmlToc, htmlBody, faLinks)of "part":convertPartToHtml(tree, htmlToc, htmlBody, faLinks)of "section":convertSectionToHtml(tree, htmlToc, htmlBody, faLinks)else:echo "Unknown root element tag: " & tree.tag()else:echo "Unknown root element kind: "echo tree.kind()proc writeHtml(fileName: string, htmlToc: string, htmlBody: string, page_title: string) =var preamble = """<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0">"""preamble &= fmt"<title>{page_title} {getDateStr()}</title>"preamble &= """<style>html,body{font-size:100%}h1{font-size:2.125em; margin-left: 0%}h2{font-size:1.6875em; margin-left: 3%}h3{font-size:1.375em; margin-left: 6%}h4{font-size:1.125em; margin-left: 9%}h5{font-size:1.125em; margin-left: 12%}h6{font-size:1em; margin-left: 15%}hr{border:solid #ddddd8;border-width:1px 0 0;clear:both;margin:1.25em 0 1.1875em;height:0}</style></head><body>"""preamble &= fmt"<h1>{page_title}</h1>"preamble &= fmt"<p>Generated {getDateStr()}</p>"preamble &= """<p> See <a href="https://manithree.gitlab.io/utfastatutes/">https://manithree.gitlab.io/utfastatutes/</a> for the latest version, or<a href="https://gitlab.com/manithree/utfastatutes">https://gitlab.com/manithree/utfastatutes</a> to report defects or make suggestions.</p><hr/>"""var post = """</body></html>"""writeFile(fileName, preamble & htmlToc & htmlBody & post )proc fixLinks(htmlToc: var string, htmlBody: var string, faLinks: var StringTableRef) =# this is where the external links are fixed up if I can figure out how# to reliably link to le.utah.govecho "Fixing links"for line in splitlines(htmlBody):# TODO this only finds the first occurencevar m = line.find(re(r"href=""([^""]+)" & "\"" & r".?"))if isSome(m):#echo m.get.captures[0][2..^1]let statute = m.get.captures[0][2..^1]if faLinks[statute] != statute:echo "Needs fixing: " & statutewhen isMainModule:var htmlToc = ""var htmlBody = ""var faLinks = newStringTable()# Read the json config filelet settings = parseJson(readFile(paramStr(1)))let codes = settings["code"]let title = settings["title"].getStr()let fileName = settings["filename"].getStr()var client = newHttpClient()for code in codes:var pg = client.getContent(code.getStr())# couldn't get multi-line regex to work, but this is probably more# efficient, anyway:var version = ""for line in splitlines(pg):var m = line.match(re"var versionDefault=""(.*)"";")if isSome(m):version = m.get.captures[0]break# download the base versioned filevar urlBase =code.getStr()[0..rfind(code.getStr(), '/')]var statute = client.getContent(urlBase & "/" & version & "." & settings["download_format"].getStr())# save the file (as downloaded)writeFile(version & "." & settings["download_format"].getStr(), statute)if settings["download_format"].getStr() == "xml" andsettings["output_format"].getStr() == "html":convertXmlToHtml(statute, htmlToc, htmlBody, faLinks)if settings["download_format"].getStr() == "xml" andsettings["output_format"].getStr() == "html":fixLinks(htmlToc, htmlBody, faLinks)writeHtml(fileName, htmlToc, htmlBody, title)
{"output_format": "html","download_format": "xml","convert_cmd": "unused","title": "Utah Firearm Statutes","filename": "UtahFirearmStatutes.html","code": ["https://le.utah.gov/xcode/ArticleI/Article_I,_Section_6.html","https://le.utah.gov/xcode/Title10/Chapter8/10-8-S47.html","https://le.utah.gov/xcode/Title29/Chapter2/29-2-S103.html","https://le.utah.gov/xcode/Title34/Chapter45/34-45-S103.html","https://le.utah.gov/xcode/Title53/Chapter2A/53-2a-S214.html","https://le.utah.gov/xcode/Title53/Chapter5A/53-5a.html","https://le.utah.gov/xcode/Title53B/Chapter3/53B-3-S103.html","https://le.utah.gov/xcode/Title76/Chapter1/76-1-S105.html","https://le.utah.gov/xcode/Title76/Chapter2/76-2-S406.html","https://le.utah.gov/xcode/Title76/Chapter2/76-2-S407.html","https://le.utah.gov/xcode/Title76/Chapter6/76-6-S206.html","https://le.utah.gov/xcode/Title76/Chapter8/76-8-S311.1.html","https://le.utah.gov/xcode/Title76/Chapter8/76-8-S311.3.html","https://le.utah.gov/xcode/Title76/Chapter10/76-10-P5.html","https://le.utah.gov/xcode/Title78A/Chapter2/78A-2-S203.html","https://le.utah.gov/xcode/Title78B/Chapter3/78B-3-S110.html"]}
{"output_format": "html","download_format": "xml","convert_cmd": "unused","title": "Utah Bicycle Statutes","filename": "UtahBikeStatutes.html","code": ["https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S102.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S208.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S305.html","https://le.utah.gov/xcode/Title41/Chapter6A/41-6a-S526.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S701.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S706.5.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S710.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S801.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S804.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S902.html","https://le.utah.gov/xcode/Title41/Chapter6A/41-6a-S1002.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1102.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1104.html","https://le.utah.gov/xcode/Title41/Chapter6A/41-6a-S1105.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1106.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1107.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1108.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1109.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1110.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1111.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1112.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1113.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1114.html","https://le.utah.gov/xcode/Title41/Chapter6A/41-6a-S1302.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1402.html","https://le.utah.gov/xcode/Title41/Chapter6a/41-6a-S1603.html"]}
-d:ssl--debugger:native
# Utah {Generic} Statutes Document GeneratorThere are lots of statutes at le.utah.gov that pertain to firearms: ownership,selling, buying, carrying, defensive use, etc. Sometimes you might (I do,anyway) want to have just the firearms-related statutes available offline. Thisprogram aims to accomplish that.And in it's latest version, it is now a more generic html booklet maker forsubsets of Utah code. Or, a specialized le.utah.gov scraper.This latest version simplifies the settings file format, and includes one formost bicycle-related statutes.## CurrentThis is a standalone nim program that's primary purpose is to download thelatest version of statute sections (chapter, parts, sections, or subsections)that relate to firearms, and optionally transform them.It is configured with the settings `.json` file. Allowed values for the`download_format` are:xmlrtfThe only supported `output_format` is `html`. The `output_format` will beignored unless the `download_format` is `xml`.When `download_format` is `xml` and `output_format`is `html` a single html pagewill be generated containing all the statues, and a linked table of contents.The xml files and the generated html file are saved in the cwd where the programis run.Two new required settings are `title` for the html title element and initialH1, and `filename` for the destination html file.## BuildingTo build from source:git clone https://gitlab.com/manithree/utfastatutes.gitcd utfastatutesnimble buildThen to run it, just:./utfastatutes_nim settings.json## HistoryI originally wrote a groovy script that downloaded the .rtf files fromle.utah.gov and used libreoffice and OoOPy to put it all together in onedocument. I discovered later that I hadn't properly reverse engineeredle.utah.gov and it didn't always download the latest versions. It also didn'thave a table of contents.So, I re-wrote it in rust, and fixed it so it that it always got the latestversion of the statute. I couldn't find a good ODF library in rust, but there'sa really good Java one, so I wrote another groovy script to take xml filesdownloaded by the rust program and generate a libreoffice document with a tableof contents.The next step was writing a groovy script to convert the xml to very simplehtml with a table of contents. Then you don't need google doc/drive to view thedocument, and you don't need any libreoffice tools at document generation time.All that happened over the last several years on github.This is a re-write in Nim (yeah, this project is my excuse to learn a newlanguage occasionally), and combine the download and html generation into onepackage. I strongly prefer the html output, so I didn't include the ODF outputin the nim re-write. The latest version will download any format you configureit for, but will only generate HTML (and only from XML).
MIT LicenseCopyright (c) 2019 Barry RobertsPermission is hereby granted, free of charge, to any person obtaining a copyof this software and associated documentation files (the "Software"), to dealin the Software without restriction, including without limitation the rightsto use, copy, modify, merge, publish, distribute, sublicense, and/or sellcopies of the Software, and to permit persons to whom the Software isfurnished to do so, subject to the following conditions:The above copyright notice and this permission notice shall be included in allcopies or substantial portions of the Software.THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ORIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THEAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHERLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THESOFTWARE.