Relates to discussion #353 (filtering) and #505 (structured CLI output).
The path filtering is used as pijul log -- path* and will log only the
changes that touched the listed paths.
The json output feature is invoked as pijul log --output-format=json.
Since internally it uses serde, this can be extended to any serde target
format.
The implementation creates two new structs LogIterator and LogEntry;
the former is used to hold the state that was previously loose in Log::run,
and provides a for_each method that can be used to map over the log entries
efficiently and in a way that reuses the most code (where efficiently means
we only have one log entry in memory at a time).
OU6JOR3CDZTH2H3NTGMV3WDIAWPD3VEJI7JRY3VJ7LPDR3QOA52QC RUBBHYZ7MCLKJIHZ3EWEC3JR3FSKOU4T2NH7KRBG7ECAU4JF3LUAC UW3KU7DHQDCI6GRYI6FI5S6PCLKNCN4QPSAU7WJROY2D5MPQYRAQC SMMBFECLGSUKRZW5YPOQPOQCOY2CH2OTZXBSZ3KG2N3J3HQZ5PSAC SXEYMYF7P4RZMZ46WPL4IZUTSQ2ATBWYZX7QNVMS3SGOYXYOHAGQC L4JXJHWXYNCL4QGJXNKKTOKKTAXKKXBJUUY7HFZGEUZ5A2V5H34QC CCLLB7OIFNFYJZTG3UCI7536TOCWSCSXR67VELSB466R24WLJSDAC I52XSRUH5RVHQBFWVMAQPTUSPAJ4KNVID2RMI3UGCVKFLYUO6WZAC A3RM526Y7LUXNYW4TL56YKQ5GVOK2R5D7JJVTSQ6TT5MEXIR6YAAC JL4WKA5PBKXRNAMETYO4I52QKASQ3COYHH2JKGA7W5YLIRZZH53AC 5OGOE4VWS5AIG4U2UYLLIGA3HY6UB7SNQOSESHNXBLET3VQXFBZAC I7VL7VPZV2NKOZRKBWWEHFOGNGGTYLPONHABVJ767D6HPJJNY5RAC Y6EVFMTA6FOH3OQH6QCSWMI3F6SYZT2FSHO6GF4M3ICENDCWFM4QC 2K7JLB4Z7BS5VFNWD4DO3MKYU7VNPA5MTVHVSDI3FQZ5ICM6XM6QC YN63NUZO4LVJ7XPMURDULTXBVJKW5MVCTZ24R7Z52QMHO3HPDUVQC EUZFFJSOWV4PXDFFPDAFBHFUUMOFEU6ST7JH57YYRRR2SEOXLN6QC I24UEJQLCH2SOXA4UHIYWTRDCHSOPU7AFTRUOTX7HZIAV4AZKYEQC PH7B6I3U5XCACAX6VX3ZDJD2DQOQS7725R6CTOATNC26NP4VPUFQC PSKXR4QEPPVJZR777HW67IEHUPGZB44MFCNQ2KUS422Q3W22IQWAC GURIBVW66JDQK3SJZRGVJ2MQLMT7JD4KLI5QPQZGPAL7WH3T6T4AC #[clap(long = "output-format")]output_format: Option<String>,/// Filter log output, showing only log entries that touched the specified/// files. Accepted as a list of paths relative to your current directory./// Currently, filters can only be applied when logging the channel that's/// in use.#[clap(last = true)]filters: Vec<String>,
impl Log {pub fn run(self) -> Result<(), anyhow::Error> {let repo = Repository::find_root(self.repo_path)?;
// A lot of error-handling noise here, but since we're dealing with// a user-command and a bunch of file-IO/path manipulation it's// probably necessary for the feedback to be good.fn get_inodes(txn: &impl libpijul::pristine::TreeTxnT,repo_path: &Path,pats: &[String],) -> Result<Vec<libpijul::Inode>, anyhow::Error> {let mut inodes = Vec::new();for pat in pats {let canon_path = match Path::new(pat).canonicalize() {Err(e) if matches!(e.kind(), std::io::ErrorKind::NotFound) => {bail!("pijul log couldn't find a file or directory corresponding to `{}`",pat)}Err(e) => return Err(e.into()),Ok(p) => p,};match canon_path.strip_prefix(repo_path).map(|p| p.to_str()) {// strip_prefix error is if repo_path is not a prefix of canon_path,// which would only happen if they pased in a filter path that's not// in the repository.Err(_) => bail!("pijul log couldn't assemble file prefix for pattern `{}`; \{} was not a file in the repository at {}",pat,canon_path.display(),repo_path.display()),// PathBuf.to_str() returns none iff the path contains invalid UTF-8.Ok(None) => bail!("pijul log couldn't assemble file prefix for pattern `{}`; \the path contained invalid UTF-8",pat),Ok(Some(s)) => match libpijul::fs::find_inode(txn, s) {Err(e) => bail!("pijul log couldn't assemble file prefix for pattern `{}`; \no Inode found for the corresponding path. Internal error: {:?}",pat,e),Ok(inode) => {inodes.push(inode);}},};}log::debug!("log filters: {:#?}\n", pats);Ok(inodes)}/// Given a list of path filters which represent the files/directories for which/// the user wants to see the logs, find the subset of relevant change hashes.fn filtered_hashes<T: TreeTxnT + GraphTxnT + DepsTxnT>(txn: &T,path: &Path,filters: &[String],) -> Result<HashSet<libpijul::Hash>, anyhow::Error> {let inodes = get_inodes(txn, path, filters)?;let mut hashes = HashSet::<libpijul::Hash>::new();for inode in inodes {// The Position<ChangeId> for the file Inode.let inode_position = match txn.get_inodes(&inode, None)? {None => bail!("Failed to get matching inode: {:?}", inode),Some(p) => p,};for pair in txn.iter_touched(inode_position)? {let (position, touched_change_id) = pair?;// Push iff the file ChangeId for this element matches that of the file Inodeif &position.change == &inode_position.change {match txn.get_external(touched_change_id)? {Some(ser_h) => {hashes.insert(libpijul::Hash::from(*ser_h));}_ => {log::error!("`get_external` failed to retrieve full hash for ChangeId {:?}",touched_change_id);bail!("Failed to retrieve full hash for {:?}", touched_change_id)}}} else {// We've gone past the relevant subset of changes in the iterator.break;}}}Ok(hashes)}/// A single log entry created by [`LogIterator`]. The fields are/// all `Option<T>` so that users can more precisely choose what/// data they want.////// The implementaiton of [`std::fmt::Display`] is the standard method/// of pretty-printing a `LogEntry` to the terminal.#[derive(Serialize)]struct LogEntry {#[serde(skip_serializing_if = "Option::is_none")]hash: Option<String>,#[serde(skip_serializing_if = "Option::is_none")]state: Option<String>,#[serde(skip_serializing_if = "Option::is_none")]authors: Option<Vec<String>>,#[serde(skip_serializing_if = "Option::is_none")]timestamp: Option<chrono::DateTime<chrono::offset::Utc>>,#[serde(skip_serializing_if = "Option::is_none")]message: Option<String>,#[serde(skip_serializing_if = "Option::is_none")]description: Option<String>,}/// The standard pretty-printimpl std::fmt::Display for LogEntry {fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {if let Some(ref h) = self.hash {writeln!(f, "Change {}", h)?;}if let Some(ref authors) = self.authors {write!(f, "Author: ")?;let mut is_first = true;for a in authors.iter() {if is_first {is_first = false;write!(f, "{}", a)?;} else {write!(f, ", {}", a)?;}}// Write a linebreak after finishing the list of authors.writeln!(f)?;}if let Some(ref timestamp) = self.timestamp {writeln!(f, "Date: {}", timestamp)?;}if let Some(ref mrk) = self.state {writeln!(f, "State: {}", mrk)?;}if let Some(ref message) = self.message {writeln!(f, "\n {}\n", message)?;}if let Some(ref description) = self.description {writeln!(f, "\n {}\n", description)?;}Ok(())}}/// Contains state needed to produce the sequence of [`LogEntry`] items/// that are to be logged. The implementation of `TryFrom<Log>` provides/// a fallible way of creating one of these from the CLI's [`Log`] structure.////// The two main things this provides are an efficient/streaming implementation/// of [`serde::Serialize`], and an implementation of [`std::fmt::Display`] that/// does the standard pretty-printing to stdout.////// The [`LogIterator::for_each`] method lets us reuse the most code while providing both/// pretty-printing and efficient serialization; we can't easily do this with/// a full implementation of Iterator because serde's serialize method requires/// self to be an immutable reference.struct LogIterator {txn: Txn,changes: libpijul::changestore::filesystem::FileSystem,cmd: Log,repo_path: PathBuf,id_path: PathBuf,channel_ref: ChannelRef<Txn>,limit: usize,offset: usize,}/// This implementation of Serialize is hand-rolled in order/// to allow for greater re-use and efficiency.impl Serialize for LogIterator {fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>whereS: Serializer,{let mut seq = serializer.serialize_seq(None)?;match self.for_each(|entry| seq.serialize_element(&entry)) {Ok(_) => seq.end(),Err(anyhow_err) => Err(serde::ser::Error::custom(format!("{}", anyhow_err))),}}}/// Pretty-prints all of the requested log entries in the standard/// user-facing format.impl std::fmt::Display for LogIterator {fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {match self.for_each(|entry| write!(f, "{}", entry)) {Err(e) => {log::error!("LogIterator::Display: {}", e);Err(std::fmt::Error)}_ => Ok(()),}}}impl TryFrom<Log> for LogIterator {type Error = anyhow::Error;fn try_from(cmd: Log) -> Result<LogIterator, Self::Error> {let repo = Repository::find_root(cmd.repo_path.clone())?;let repo_path = repo.path.clone();
let channel = if let Some(channel) = txn.load_channel(channel_name)? {
// The only situation that's disallowed is if the user's trying to apply// path filters AND get the logs for a channel other than the one they're// currently using (where using means the one that comprises the working copy)if !cmd.filters.is_empty()&& !(channel_name == txn.current_channel().unwrap_or(crate::DEFAULT_CHANNEL)){bail!("Currently, log filters can only be applied to the channel currently in use.")}let channel_ref = if let Some(channel) = txn.load_channel(channel_name)? {
let mut stdout = std::io::stdout();let limit = self.limit.unwrap_or(std::usize::MAX);let offset = self.offset.unwrap_or(0);if self.hash_only {for h in txn.reverse_log(&*channel.read(), None)?.skip(offset).take(limit){let h: libpijul::Hash = (h?.1).0.into();writeln!(stdout, "{}", h.to_base32())?
let limit = cmd.limit.unwrap_or(std::usize::MAX);let offset = cmd.offset.unwrap_or(0);let mut id_path = repo.path.join(libpijul::DOT_DIR);id_path.push("identities");Ok(Self {txn,cmd,changes,repo_path,id_path,channel_ref,limit,offset,})}}impl LogIterator {/// Call `f` on each [`LogEntry`] in a [`LogIterator`].////// The purpose of this is to let us execute a function over the log entries/// without having to duplicate the iteration/filtering logic or/// having to collect all of the elements first.fn for_each<A, E>(&self,mut f: impl FnMut(LogEntry) -> Result<A, E>,) -> Result<(), anyhow::Error>whereE: std::fmt::Display,{// A cache of authors to keys. Prevents us from having to do// a lot of file-io for looking up the same author multiple times.let mut authors = HashMap::new();let mut id_path = self.id_path.clone();// If the user applied path filters, figure out what change hashes// are to be logged.let mut requested_hashes = filtered_hashes(&self.txn,self.repo_path.as_ref(),self.cmd.filters.as_slice(),)?;// Get the (Hash, Merkle) pairs for the portion of reverse_log// that are between offset and limit.let hs = self.txn.reverse_log(&*self.channel_ref.read(), None)?.skip(self.offset).take(self.limit).map(|res| {res.map(|(_, (ser_h, ser_m))| {(libpijul::Hash::from(ser_h), libpijul::Merkle::from(ser_m))})});for pr in hs {let (h, mrk) = pr?;if (self.cmd.filters.is_empty()) || requested_hashes.remove(&h) {// If there were no path filters applied, OR is this was one of the hashes// marked by the file filters that were appliedlet entry = self.mk_log_entry(&mut authors, &mut id_path, h, Some(mrk))?;if let Err(e) = f(entry) {return Err(anyhow::Error::msg(format!("{}", e)));}} else if requested_hashes.is_empty() {// If the user applied path filters, but the relevant change hashes// have been exhausted, we can break early.break;} else {// The user applied path filters; this wasn't a hit, but// there are still hits to be logged.continue;
for h in txn.reverse_log(&*channel.read(), None)?.skip(offset).take(limit){let (h, mrk) = h?.1;let h: libpijul::Hash = h.into();let mrk: libpijul::Merkle = mrk.into();let header = changes.get_header(&h.into())?;writeln!(stdout, "Change {}", h.to_base32())?;write!(stdout, "Author: ")?;let mut is_first = true;for mut auth in header.authors.into_iter() {let auth = if let Some(k) = auth.0.remove("key") {match authors.entry(k) {Entry::Occupied(e) => e.into_mut(),Entry::Vacant(e) => {let mut id = None;id_path.push(e.key());if let Ok(f) = std::fs::File::open(&id_path) {if let Ok(id_) =serde_json::from_reader::<_, super::Identity>(f){id = Some(id_)}}id_path.pop();if let Some(id) = id {e.insert(id.login)} else {let k = e.key().to_string();e.insert(k)
Ok(())}/// Create a [`LogEntry`] for a given hash.////// Most of this is just getting the right key information from either the cache/// or from the relevant file.fn mk_log_entry<'x>(&self,author_kvs: &'x mut HashMap<String, String>,id_path: &mut PathBuf,h: libpijul::Hash,m: Option<libpijul::Merkle>,) -> Result<LogEntry, anyhow::Error> {let header = self.changes.get_header(&h.into())?;let authors = header.authors.into_iter().map(|mut auth| {let auth = if let Some(k) = auth.0.remove("key") {match author_kvs.entry(k) {Entry::Occupied(e) => e.into_mut(),Entry::Vacant(e) => {let mut id = None;id_path.push(e.key());if let Ok(f) = std::fs::File::open(&self.id_path) {if let Ok(id_) = serde_json::from_reader::<_, super::Identity>(f) {id = Some(id_)
}writeln!(stdout)?;writeln!(stdout, "Date: {}", header.timestamp)?;if states {writeln!(stdout, "State: {}", mrk.to_base32())?;}writeln!(stdout, "\n {}\n", header.message)?;if self.descriptions {if let Some(ref descr) = header.description {writeln!(stdout, "\n {}\n", descr)?;}}
} else {auth.0.get("name").unwrap()};auth.to_owned()}).collect();Ok(LogEntry {hash: Some(h.to_base32()),state: m.map(|mm| mm.to_base32()).filter(|_| self.cmd.states),authors: Some(authors),timestamp: Some(header.timestamp),message: Some(header.message.clone()),description: header.description,})}}impl Log {// In order to accommodate both pretty-printing and efficient serialization to a serde// target format, this now delegates mostly to [`LogIterator`].pub fn run(self) -> Result<(), anyhow::Error> {let mut stdout = std::io::stdout();match self.output_format.as_ref().map(|s| s.as_str()) {Some(s) if s.eq_ignore_ascii_case("json") => {serde_json::to_writer_pretty(&mut stdout, &LogIterator::try_from(self)?)?}_ => {super::pager();LogIterator::try_from(self)?.for_each(|entry| write!(&mut stdout, "{}", entry))?