Hi guys. This is driving me crazy. I am trying to implement the Google Document AI into a website, but I have not been able. This is the code snippet I have in the backend. I do not know what I am missing because the function output in the console says: ENOENT: no such file or directory, open ‘https://9351dc3b-6723-4f46-a20e-14879b10b395.usrfiles.com/ugd/9351dc_7d9c44e33fa8468da039730ddb115f83.pdf’
If anyone can help, please let me know, thanks.
import _ from ‘lodash’ ;
const { DocumentProcessorServiceClient } = require ( ‘@google-cloud/documentai’ ). v1 ;
//---------- Google Document AI API----------//
const projectId = ‘ancient-bond-’ ;
const location = ‘us’ ;
const processorId = '20c3a49765’ ;
const filePath = ‘https://9351dc3b-6723-4f46-a20e-14879b10b395.usrfiles.com/ugd/9351dc_7d9c44e33fa8468da039730ddb115f83.pdf’ ;
const documentaiClient = new DocumentProcessorServiceClient ();
export async function quickstart () {
// The full resource name of the processor
const name = projects/ ${ projectId } /locations/ ${ location } /processors/ ${ processorId }
;
// Read the file into memory
const fs = require ( 'fs' ). promises ;
const imageFile = **await** fs . readFile ( filePath );
// Convert the image data to a Buffer and base64 encode it
const encodedImage = Buffer . from ( imageFile ). toString ( 'base64' );
const request = {
name ,
rawDocument : {
content : encodedImage ,
mimeType : 'application/pdf' ,
},
};
// Recognizes text entities in the PDF document
const [ result ] = **await** documentaiClient . processDocument ( request );
const { document } = result ;
// Get all of the document text as one big string
const { text } = document ;
// Extract shards from the text field
const getText = textAnchor => {
if (! textAnchor.textSegments || textAnchor.textSegments.length === 0 ) {
**return** '' ;
}
// First shard in document doesn't have startIndex property
const startIndex = textAnchor.textSegments [ 0 ]. startIndex || 0 ;
const endIndex = textAnchor.textSegments [ 0 ]. endIndex ;
**return** text . substring ( startIndex , endIndex );
};
// Read the text recognition output from the processor
console . log ( 'The document contains the following paragraphs:' );
const [ page1 ] = document.pages ;
const { paragraphs } = page1 ;
**for** ( const paragraph **of** paragraphs ) {
const paragraphText = getText ( paragraph.layout.textAnchor );
console . log ( `Paragraph text:\n ${ paragraphText } ` );
}
}